From faa2a06b1a621c92844e30cdb2114d05af285fa9 Mon Sep 17 00:00:00 2001 From: Szymon Date: Sun, 19 Nov 2023 08:31:37 +0100 Subject: [PATCH 1/3] Modified classification metrics --- GANDLF/metrics/classification.py | 79 ++++++++++++++++++++++++-------- 1 file changed, 59 insertions(+), 20 deletions(-) diff --git a/GANDLF/metrics/classification.py b/GANDLF/metrics/classification.py index d79ec752d..05e9c8e38 100644 --- a/GANDLF/metrics/classification.py +++ b/GANDLF/metrics/classification.py @@ -1,4 +1,5 @@ import torchmetrics as tm +from torch.nn.functional import one_hot from ..utils import get_output_from_calculator @@ -26,42 +27,80 @@ def overall_stats(predictions, ground_truth, params): "per_class_average": "macro", "per_class_weighted": "weighted", } + task = "binary" if params["model"]["num_classes"] == 2 else "multiclass" + # consider adding a "multilabel field in the future" # metrics that need the "average" parameter + for average_type, average_type_key in average_types_keys.items(): calculators = { "accuracy": tm.Accuracy( - num_classes=params["model"]["num_classes"], average=average_type_key + task=task, + num_classes=params["model"]["num_classes"], + average=average_type_key, ), "precision": tm.Precision( - num_classes=params["model"]["num_classes"], average=average_type_key + task=task, + num_classes=params["model"]["num_classes"], + average=average_type_key, ), "recall": tm.Recall( - num_classes=params["model"]["num_classes"], average=average_type_key + task=task, + num_classes=params["model"]["num_classes"], + average=average_type_key, ), "f1": tm.F1Score( - num_classes=params["model"]["num_classes"], average=average_type_key + task=task, + num_classes=params["model"]["num_classes"], + average=average_type_key, ), "specificity": tm.Specificity( - num_classes=params["model"]["num_classes"], average=average_type_key + task=task, + num_classes=params["model"]["num_classes"], + average=average_type_key, ), ## weird error for multi-class problem, where pos_label is not getting set - # "aucroc": tm.AUROC( - # num_classes=params["model"]["num_classes"], average=average_type_key - # ), + "aucroc": tm.AUROC( + task=task, + num_classes=params["model"]["num_classes"], + average=average_type_key + if average_type_key is not "micro" + else "macro", + ), } for metric_name, calculator in calculators.items(): - output_metrics[ - f"{metric_name}_{average_type}" - ] = get_output_from_calculator(predictions, ground_truth, calculator) + if metric_name == "aucroc": + one_hot_preds = one_hot( + predictions.long(), num_classes=params["model"]["num_classes"] + ) + output_metrics[metric_name] = get_output_from_calculator( + one_hot_preds.float(), ground_truth, calculator + ) + else: + output_metrics[metric_name] = get_output_from_calculator( + predictions, ground_truth, calculator + ) + + #### HERE WE NEED TO MODIFY TESTS - ROC IS RETURNING A TUPLE. WE MAY ALSO DISCRAD IT #### + # what is AUC metric telling at all? Computing it for predictions and ground truth + # is not making sense # metrics that do not have any "average" parameter - calculators = { - "auc": tm.AUC(reorder=True), - ## weird error for multi-class problem, where pos_label is not getting set - # "roc": tm.ROC(num_classes=params["model"]["num_classes"]), - } - for metric_name, calculator in calculators.items(): - output_metrics[metric_name] = get_output_from_calculator( - predictions, ground_truth, calculator - ) + # calculators = { + # + # # "auc": tm.AUC(reorder=True), + # ## weird error for multi-class problem, where pos_label is not getting set + # "roc": tm.ROC(task=task, num_classes=params["model"]["num_classes"]), + # } + # for metric_name, calculator in calculators.items(): + # if metric_name == "roc": + # one_hot_preds = one_hot( + # predictions.long(), num_classes=params["model"]["num_classes"] + # ) + # output_metrics[metric_name] = get_output_from_calculator( + # one_hot_preds.float(), ground_truth, calculator + # ) + # else: + # output_metrics[metric_name] = get_output_from_calculator( + # predictions, ground_truth, calculator + # ) return output_metrics From 97c395c6eb45b7fac350242f018f6a41e6b9f075 Mon Sep 17 00:00:00 2001 From: Szymon Date: Sun, 19 Nov 2023 09:34:28 +0100 Subject: [PATCH 2/3] Upgrade to torchmetrics 1.1.2 - added requirements in setup.py - modified the metrics constructors in the GANDLF.metrics.generic and .classification files to be compatibile with updated torchmetrics API - modified .yaml file to cover new fields compatibile with new torchmetrics API - defined utility functions for easier construction of metrics from new torchmetrics API --- GANDLF/metrics/classification.py | 6 +- GANDLF/metrics/generic.py | 77 ++++- GANDLF/utils/__init__.py | 1 + GANDLF/utils/generic.py | 23 +- setup.py | 14 +- testing/config_classification.yaml | 2 +- testing/test_full.py | 535 ++++++++++++++++++++++------- 7 files changed, 501 insertions(+), 157 deletions(-) diff --git a/GANDLF/metrics/classification.py b/GANDLF/metrics/classification.py index 05e9c8e38..791607ff2 100644 --- a/GANDLF/metrics/classification.py +++ b/GANDLF/metrics/classification.py @@ -1,6 +1,7 @@ import torchmetrics as tm from torch.nn.functional import one_hot from ..utils import get_output_from_calculator +from GANDLF.utils.generic import determine_task def overall_stats(predictions, ground_truth, params): @@ -27,7 +28,7 @@ def overall_stats(predictions, ground_truth, params): "per_class_average": "macro", "per_class_weighted": "weighted", } - task = "binary" if params["model"]["num_classes"] == 2 else "multiclass" + task = determine_task(params) # consider adding a "multilabel field in the future" # metrics that need the "average" parameter @@ -70,7 +71,8 @@ def overall_stats(predictions, ground_truth, params): for metric_name, calculator in calculators.items(): if metric_name == "aucroc": one_hot_preds = one_hot( - predictions.long(), num_classes=params["model"]["num_classes"] + predictions.long(), + num_classes=params["model"]["num_classes"], ) output_metrics[metric_name] = get_output_from_calculator( one_hot_preds.float(), ground_truth, calculator diff --git a/GANDLF/metrics/generic.py b/GANDLF/metrics/generic.py index 7f94ad8b8..6fc6cf2f7 100644 --- a/GANDLF/metrics/generic.py +++ b/GANDLF/metrics/generic.py @@ -1,23 +1,54 @@ import torch -from torchmetrics import F1Score, Precision, Recall, JaccardIndex, Accuracy, Specificity +from torchmetrics import ( + F1Score, + Precision, + Recall, + JaccardIndex, + Accuracy, + Specificity, +) from GANDLF.utils.tensor import one_hot +from GANDLF.utils.generic import determine_task -def generic_function_output_with_check(predicted_classes, label, metric_function): +def define_average_type_key(params, metric_name): + """Determine if the metric config defines the type of average to use. + If not, fallback to the default (macro) average type. + """ + if "average" in params["metrics"][metric_name]: + average_type_key = params["metrics"][metric_name]["average"] + else: + average_type_key = "macro" + UserWarning( + "WARNING: Average type not defined in config, using default (macro)." + ) + return average_type_key + + +def generic_function_output_with_check( + predicted_classes, label, metric_function +): if torch.min(predicted_classes) < 0: print( "WARNING: Negative values detected in prediction, cannot compute torchmetrics calculations." ) return torch.zeros((1), device=predicted_classes.device) else: + try: + max_clamp_val = metric_function.num_classes - 1 + except AttributeError: + max_clamp_val = 1 predicted_new = torch.clamp( - predicted_classes.cpu().int(), max=metric_function.num_classes - 1 + predicted_classes.cpu().int(), max=max_clamp_val ) predicted_new = predicted_new.reshape(label.shape) return metric_function(predicted_new, label.cpu().int()) -def generic_torchmetrics_score(output, label, metric_class, metric_key, params): +def generic_torchmetrics_score( + output, label, metric_class, metric_key, params +): + task = determine_task(params) num_classes = params["model"]["num_classes"] predicted_classes = output if params["problem_type"] == "classification": @@ -28,10 +59,9 @@ def generic_torchmetrics_score(output, label, metric_class, metric_key, params): params["metrics"][metric_key]["multi_class"] = False params["metrics"][metric_key]["mdmc_average"] = None metric_function = metric_class( + task=task, average=params["metrics"][metric_key]["average"], num_classes=num_classes, - multiclass=params["metrics"][metric_key]["multi_class"], - mdmc_average=params["metrics"][metric_key]["mdmc_average"], threshold=params["metrics"][metric_key]["threshold"], ) @@ -45,7 +75,9 @@ def recall_score(output, label, params): def precision_score(output, label, params): - return generic_torchmetrics_score(output, label, Precision, "precision", params) + return generic_torchmetrics_score( + output, label, Precision, "precision", params + ) def f1_score(output, label, params): @@ -53,11 +85,15 @@ def f1_score(output, label, params): def accuracy(output, label, params): - return generic_torchmetrics_score(output, label, Accuracy, "accuracy", params) + return generic_torchmetrics_score( + output, label, Accuracy, "accuracy", params + ) def specificity_score(output, label, params): - return generic_torchmetrics_score(output, label, Specificity, "specificity", params) + return generic_torchmetrics_score( + output, label, Specificity, "specificity", params + ) def iou_score(output, label, params): @@ -67,12 +103,23 @@ def iou_score(output, label, params): predicted_classes = torch.argmax(output, 1) elif params["problem_type"] == "segmentation": label = one_hot(label, params["model"]["class_list"]) - - recall = JaccardIndex( - reduction=params["metrics"]["iou"]["reduction"], - num_classes=num_classes, - threshold=params["metrics"]["iou"]["threshold"], - ) + task = determine_task(params) + if task == "binary": + recall = JaccardIndex( + task=task, + threshold=params["metrics"]["iou"]["threshold"], + ) + elif task == "multiclass": + recall = JaccardIndex( + task=task, + average=define_average_type_key(params, "iou"), + num_classes=num_classes, + threshold=params["metrics"]["iou"]["threshold"], + ) + else: + raise NotImplementedError( + "IoU score is not implemented for multilabel problems" + ) return generic_function_output_with_check( predicted_classes.cpu().int(), label.cpu().int(), recall diff --git a/GANDLF/utils/__init__.py b/GANDLF/utils/__init__.py index 97c8facef..cbaca51c6 100644 --- a/GANDLF/utils/__init__.py +++ b/GANDLF/utils/__init__.py @@ -50,6 +50,7 @@ suppress_stdout_stderr, set_determinism, print_and_format_metrics, + determine_task, ) from .modelio import ( diff --git a/GANDLF/utils/generic.py b/GANDLF/utils/generic.py index 159708225..83c0056aa 100644 --- a/GANDLF/utils/generic.py +++ b/GANDLF/utils/generic.py @@ -6,6 +6,7 @@ import SimpleITK as sitk from contextlib import contextmanager, redirect_stderr, redirect_stdout from os import devnull +from typing import Dict, Any, Union @contextmanager @@ -48,6 +49,15 @@ def checkPatchDivisibility(patch_size, number=16): return True +def determine_task(params: Dict[str, Union[Dict[str, Any], Any]]) -> str: + """Determine the task (binary or multiclass) from the model config. + Args: + params (dict): The parameter dictionary containing training and data information. + """ + task = "binary" if params["model"]["num_classes"] == 2 else "multiclass" + return task + + def get_date_time(): """ Get a well-parsed date string @@ -146,7 +156,10 @@ def checkPatchDimensions(patch_size, numlay): patch_size_to_check = patch_size_to_check[:-1] if all( - [x >= 2 ** (numlay + 1) and x % 2**numlay == 0 for x in patch_size_to_check] + [ + x >= 2 ** (numlay + 1) and x % 2**numlay == 0 + for x in patch_size_to_check + ] ): return numlay else: @@ -182,7 +195,9 @@ def get_array_from_image_or_tensor(input_tensor_or_image): elif isinstance(input_tensor_or_image, np.ndarray): return input_tensor_or_image else: - raise ValueError("Input must be a torch.Tensor or sitk.Image or np.ndarray") + raise ValueError( + "Input must be a torch.Tensor or sitk.Image or np.ndarray" + ) def set_determinism(seed=42): @@ -252,7 +267,9 @@ def __update_metric_from_list_to_single_string(input_metrics_dict) -> dict: output_metrics_dict = deepcopy(cohort_level_metrics) for metric in metrics_dict_from_parameters: if isinstance(sample_level_metrics[metric], np.ndarray): - to_print = (sample_level_metrics[metric] / length_of_dataloader).tolist() + to_print = ( + sample_level_metrics[metric] / length_of_dataloader + ).tolist() else: to_print = sample_level_metrics[metric] / length_of_dataloader output_metrics_dict[metric] = to_print diff --git a/setup.py b/setup.py index f3deacf2b..61b3d1d0e 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,9 @@ readme = readme_file.read() except Exception as error: readme = "No README information found." - sys.stderr.write("Warning: Could not open '%s' due %s\n" % ("README.md", error)) + sys.stderr.write( + "Warning: Could not open '%s' due %s\n" % ("README.md", error) + ) class CustomInstallCommand(install): @@ -39,7 +41,9 @@ def run(self): except Exception as error: __version__ = "0.0.1" - sys.stderr.write("Warning: Could not open '%s' due %s\n" % (filepath, error)) + sys.stderr.write( + "Warning: Could not open '%s' due %s\n" % (filepath, error) + ) # Handle cases where specific files need to be bundled into the final package as installed via PyPI dockerfiles = [ @@ -54,7 +58,9 @@ def run(self): ] setup_files = ["setup.py", ".dockerignore", "pyproject.toml", "MANIFEST.in"] all_extra_files = dockerfiles + entrypoint_files + setup_files -all_extra_files_pathcorrected = [os.path.join("../", item) for item in all_extra_files] +all_extra_files_pathcorrected = [ + os.path.join("../", item) for item in all_extra_files +] # find_packages should only ever find these as subpackages of gandlf, not as top-level packages # generate this dynamically? # GANDLF.GANDLF is needed to prevent recursion madness in deployments @@ -99,7 +105,7 @@ def run(self): "psutil", "medcam", "opencv-python", - "torchmetrics==0.8.1", + "torchmetrics==1.1.2", "zarr==2.10.3", "pydicom", "onnx", diff --git a/testing/config_classification.yaml b/testing/config_classification.yaml index d9b96e882..f61020d27 100644 --- a/testing/config_classification.yaml +++ b/testing/config_classification.yaml @@ -21,7 +21,7 @@ metrics: - recall - specificity - iou: { - reduction: sum, + average: micro, } modality: rad diff --git a/testing/test_full.py b/testing/test_full.py index 9dd860782..cf8419849 100644 --- a/testing/test_full.py +++ b/testing/test_full.py @@ -109,7 +109,9 @@ def test_generic_download_data(): print("00: Downloading the sample data") - urlToDownload = "https://drive.google.com/uc?id=1c4Yrv-jnK6Tk7Ne1HmMTChv-4nYk43NT" + urlToDownload = ( + "https://drive.google.com/uc?id=1c4Yrv-jnK6Tk7Ne1HmMTChv-4nYk43NT" + ) files_check = [ os.path.join(inputDir, "2d_histo_segmentation", "1", "image.tiff"), @@ -155,7 +157,9 @@ def test_generic_constructTrainingCSV(): # else: # continue outputFile = inputDir + "/train_" + application_data + ".csv" - outputFile_rel = inputDir + "/train_" + application_data + "_relative.csv" + outputFile_rel = ( + inputDir + "/train_" + application_data + "_relative.csv" + ) # Test with various combinations of relative/absolute paths # Absolute input/output writeTrainingCSV( @@ -183,7 +187,9 @@ def test_generic_constructTrainingCSV(): with open( inputDir + "/train_" + application_data + ".csv", "r" ) as read_f, open( - inputDir + "/train_" + application_data_regression + ".csv", "w", newline="" + inputDir + "/train_" + application_data_regression + ".csv", + "w", + newline="", ) as write_reg, open( inputDir + "/train_" + application_data_classification + ".csv", "w", @@ -251,7 +257,9 @@ def test_train_segmentation_rad_2d(device): parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False parameters["data_preprocessing"]["resize_image"] = [224, 224] - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) # read and initialize parameters for specific data dimension for model in all_models_segmentation: if model == "imagenet_unet": @@ -269,6 +277,7 @@ def test_train_segmentation_rad_2d(device): parameters["nested_training"]["testing"] = -5 parameters["nested_training"]["validation"] = -5 sanitize_outputDir() + print(parameters) TrainingManager( dataframe=training_data, outputDir=outputDir, @@ -301,7 +310,9 @@ def test_train_segmentation_sdnet_rad_2d(device): parameters["model"]["architecture"] = "sdnet" parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) sanitize_outputDir() TrainingManager( dataframe=training_data, @@ -335,10 +346,14 @@ def test_train_segmentation_rad_3d(device): parameters["model"]["final_layer"] = "softmax" parameters["model"]["amp"] = True parameters["in_memory"] = True - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters["model"]["num_channels"] = len( + parameters["headers"]["channelHeaders"] + ) parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) # loop through selected models and train for single epoch for model in all_models_segmentation: if model == "imagenet_unet": @@ -389,11 +404,15 @@ def test_train_regression_rad_2d(device): inputDir + "/train_2d_rad_regression.csv" ) parameters["model"]["num_channels"] = 3 - parameters["model"]["class_list"] = parameters["headers"]["predictionHeaders"] + parameters["model"]["class_list"] = parameters["headers"][ + "predictionHeaders" + ] parameters["scaling_factor"] = 1 parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) # loop through selected models and train for single epoch for model in all_models_regression: parameters["model"]["architecture"] = model @@ -430,9 +449,13 @@ def test_train_regression_rad_2d_imagenet(device): inputDir + "/train_2d_rad_regression.csv" ) parameters["model"]["num_channels"] = 3 - parameters["model"]["class_list"] = parameters["headers"]["predictionHeaders"] + parameters["model"]["class_list"] = parameters["headers"][ + "predictionHeaders" + ] parameters["scaling_factor"] = 1 - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) # loop through selected models and train for single epoch for model in all_models_classification: parameters["model"]["architecture"] = model @@ -468,13 +491,17 @@ def test_train_regression_brainage_rad_2d(device): inputDir + "/train_2d_rad_regression.csv" ) parameters["model"]["num_channels"] = 3 - parameters["model"]["class_list"] = parameters["headers"]["predictionHeaders"] + parameters["model"]["class_list"] = parameters["headers"][ + "predictionHeaders" + ] parameters["scaling_factor"] = 1 parameters["model"]["architecture"] = "brain_age" parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False parameters_temp = copy.deepcopy(parameters) - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) sanitize_outputDir() TrainingManager( dataframe=training_data, @@ -488,7 +515,9 @@ def test_train_regression_brainage_rad_2d(device): # file_config_temp = write_temp_config_path(parameters_temp) model_path = os.path.join(outputDir, "brain_age_best.pth.tar") config_path = os.path.join(outputDir, "parameters.pkl") - optimization_result = post_training_model_optimization(model_path, config_path) + optimization_result = post_training_model_optimization( + model_path, config_path + ) assert optimization_result == False, "Optimization should fail" sanitize_outputDir() @@ -509,11 +538,17 @@ def test_train_regression_rad_3d(device): training_data, parameters["headers"] = parseTrainingCSV( inputDir + "/train_3d_rad_regression.csv" ) - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) - parameters["model"]["class_list"] = parameters["headers"]["predictionHeaders"] + parameters["model"]["num_channels"] = len( + parameters["headers"]["channelHeaders"] + ) + parameters["model"]["class_list"] = parameters["headers"][ + "predictionHeaders" + ] parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) # loop through selected models and train for single epoch for model in all_models_regression: if "efficientnet" in model: @@ -563,7 +598,9 @@ def test_train_classification_rad_2d(device): parameters["model"]["num_channels"] = 3 parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) # loop through selected models and train for single epoch for model in all_models_regression: if model == "imagenet_unet": @@ -620,8 +657,12 @@ def test_train_classification_rad_3d(device): training_data, parameters["headers"] = parseTrainingCSV( inputDir + "/train_3d_rad_classification.csv" ) - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters["model"]["num_channels"] = len( + parameters["headers"]["channelHeaders"] + ) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False # loop through selected models and train for single epoch @@ -671,8 +712,12 @@ def test_train_resume_inference_classification_rad_3d(device): training_data, parameters["headers"] = parseTrainingCSV( inputDir + "/train_3d_rad_classification.csv" ) - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters["model"]["num_channels"] = len( + parameters["headers"]["channelHeaders"] + ) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) # loop through selected models and train for single epoch model = all_models_regression[0] parameters["model"]["architecture"] = model @@ -740,8 +785,12 @@ def test_train_inference_optimize_classification_rad_3d(device): training_data, parameters["headers"] = parseTrainingCSV( inputDir + "/train_3d_rad_classification.csv" ) - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters["model"]["num_channels"] = len( + parameters["headers"]["channelHeaders"] + ) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) parameters["model"]["architecture"] = all_models_regression[0] parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False @@ -757,9 +806,13 @@ def test_train_inference_optimize_classification_rad_3d(device): ) # file_config_temp = write_temp_config_path(parameters_temp) - model_path = os.path.join(outputDir, all_models_regression[0] + "_best.pth.tar") + model_path = os.path.join( + outputDir, all_models_regression[0] + "_best.pth.tar" + ) config_path = os.path.join(outputDir, "parameters.pkl") - optimization_result = post_training_model_optimization(model_path, config_path) + optimization_result = post_training_model_optimization( + model_path, config_path + ) assert optimization_result == True, "Optimization should pass" ## testing inference @@ -798,7 +851,9 @@ def test_train_inference_optimize_segmentation_rad_2d(device): parameters["model"]["architecture"] = "resunet" parameters["model"]["onnx_export"] = True parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) sanitize_outputDir() TrainingManager( dataframe=training_data, @@ -826,7 +881,9 @@ def test_train_inference_optimize_segmentation_rad_2d(device): def test_train_inference_classification_with_logits_single_fold_rad_3d(device): - print("15: Starting 3D Rad classification tests for single fold logits inference") + print( + "15: Starting 3D Rad classification tests for single fold logits inference" + ) # read and initialize parameters for specific data dimension parameters = parseConfig( testingDir + "/config_classification.yaml", version_check_flag=False @@ -840,8 +897,12 @@ def test_train_inference_classification_with_logits_single_fold_rad_3d(device): training_data, parameters["headers"] = parseTrainingCSV( inputDir + "/train_3d_rad_classification.csv" ) - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters["model"]["num_channels"] = len( + parameters["headers"]["channelHeaders"] + ) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) # loop through selected models and train for single epoch model = all_models_regression[0] parameters["model"]["architecture"] = model @@ -872,8 +933,12 @@ def test_train_inference_classification_with_logits_single_fold_rad_3d(device): parameters["patch_size"] = patch_size["3D"] parameters["model"]["dimension"] = 3 parameters["model"]["final_layer"] = "logits" - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters["model"]["num_channels"] = len( + parameters["headers"]["channelHeaders"] + ) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) # loop through selected models and train for single epoch model = all_models_regression[0] parameters["model"]["architecture"] = model @@ -890,8 +955,12 @@ def test_train_inference_classification_with_logits_single_fold_rad_3d(device): print("passed") -def test_train_inference_classification_with_logits_multiple_folds_rad_3d(device): - print("16: Starting 3D Rad classification tests for multi-fold logits inference") +def test_train_inference_classification_with_logits_multiple_folds_rad_3d( + device, +): + print( + "16: Starting 3D Rad classification tests for multi-fold logits inference" + ) # read and initialize parameters for specific data dimension parameters = parseConfig( testingDir + "/config_classification.yaml", version_check_flag=False @@ -908,8 +977,12 @@ def test_train_inference_classification_with_logits_multiple_folds_rad_3d(device training_data, parameters["headers"] = parseTrainingCSV( inputDir + "/train_3d_rad_classification.csv" ) - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters["model"]["num_channels"] = len( + parameters["headers"]["channelHeaders"] + ) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) # loop through selected models and train for single epoch model = all_models_regression[0] parameters["model"]["architecture"] = model @@ -941,7 +1014,8 @@ def test_train_scheduler_classification_rad_2d(device): # loop through selected models and train for single epoch for scheduler in global_schedulers_dict: parameters = parseConfig( - testingDir + "/config_classification.yaml", version_check_flag=False + testingDir + "/config_classification.yaml", + version_check_flag=False, ) parameters["modality"] = "rad" parameters["patch_size"] = patch_size["2D"] @@ -953,7 +1027,9 @@ def test_train_scheduler_classification_rad_2d(device): parameters["model"]["num_channels"] = 3 parameters["model"]["architecture"] = "densenet121" parameters["model"]["norm_type"] = "instance" - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False parameters["scheduler"] = {} @@ -997,7 +1073,9 @@ def test_train_optimizer_classification_rad_2d(device): parameters["model"]["norm_type"] = "none" parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) # loop through selected models and train for single epoch for optimizer in global_optimizer_dict: parameters["optimizer"] = {} @@ -1034,12 +1112,16 @@ def test_clip_train_classification_rad_3d(device): training_data, parameters["headers"] = parseTrainingCSV( inputDir + "/train_3d_rad_classification.csv" ) - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters["model"]["num_channels"] = len( + parameters["headers"]["channelHeaders"] + ) parameters["model"]["architecture"] = "vgg16" parameters["model"]["norm_type"] = "None" parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) # loop through selected models and train for single epoch for clip_mode in all_clip_modes: parameters["clip_mode"] = clip_mode @@ -1079,15 +1161,21 @@ def test_train_normtype_segmentation_rad_3d(device): parameters["in_memory"] = True parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters["model"]["num_channels"] = len( + parameters["headers"]["channelHeaders"] + ) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) # these should raise exceptions for norm_type in ["none", None]: parameters["model"]["norm_type"] = norm_type file_config_temp = write_temp_config_path(parameters) with pytest.raises(Exception) as exc_info: - parameters = parseConfig(file_config_temp, version_check_flag=False) + parameters = parseConfig( + file_config_temp, version_check_flag=False + ) print("Exception raised:", exc_info.value) @@ -1150,7 +1238,9 @@ def test_train_metrics_segmentation_rad_2d(device): training_data, parameters["headers"] = parseTrainingCSV( inputDir + "/train_2d_rad_segmentation.csv" ) - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) sanitize_outputDir() TrainingManager( dataframe=training_data, @@ -1185,7 +1275,9 @@ def test_train_metrics_regression_rad_2d(device): parameters["model"]["architecture"] = "vgg11" parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = True - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) sanitize_outputDir() TrainingManager( dataframe=training_data, @@ -1229,7 +1321,9 @@ def get_parameters_after_alteration(loss_type: str) -> dict: parameters["metrics"] = ["dice"] parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) return parameters, training_data # loop through selected models and train for single epoch @@ -1277,7 +1371,9 @@ def test_generic_config_read(): inputDir + "/train_2d_rad_segmentation.csv" ) assert parameters is not None, "parameters is None" - data_loader = ImagesFromDataFrame(training_data, parameters, True, "unit_test") + data_loader = ImagesFromDataFrame( + training_data, parameters, True, "unit_test" + ) assert data_loader is not None, "data_loader is None" os.remove(file_config_temp) @@ -1297,7 +1393,9 @@ def test_generic_config_read(): inputDir + "/train_2d_rad_segmentation.csv" ) assert parameters is not None, "parameters is None" - data_loader = ImagesFromDataFrame(training_data, parameters, True, "unit_test") + data_loader = ImagesFromDataFrame( + training_data, parameters, True, "unit_test" + ) assert data_loader is not None, "data_loader is None" os.remove(file_config_temp) @@ -1315,7 +1413,9 @@ def test_generic_config_read(): inputDir + "/train_2d_rad_segmentation.csv" ) assert parameters is not None, "parameters is None" - data_loader = ImagesFromDataFrame(training_data, parameters, True, "unit_test") + data_loader = ImagesFromDataFrame( + training_data, parameters, True, "unit_test" + ) assert data_loader is not None, "data_loader is None" os.remove(file_config_temp) @@ -1333,7 +1433,9 @@ def test_generic_config_read(): inputDir + "/train_2d_rad_segmentation.csv" ) assert parameters is not None, "parameters is None" - data_loader = ImagesFromDataFrame(training_data, parameters, True, "unit_test") + data_loader = ImagesFromDataFrame( + training_data, parameters, True, "unit_test" + ) assert data_loader is not None, "data_loader is None" os.remove(file_config_temp) @@ -1352,7 +1454,9 @@ def test_generic_cli_function_preprocess(): input_data_df, _ = parseTrainingCSV(file_data, train=False) # add random metadata to ensure it gets preserved input_data_df["metadata_test_string"] = input_data_df.shape[0] * ["test"] - input_data_df["metadata_test_float"] = np.random.rand(input_data_df.shape[0]) + input_data_df["metadata_test_float"] = np.random.rand( + input_data_df.shape[0] + ) input_data_df["metadata_test_int"] = np.random.randint( 0, 100, input_data_df.shape[0] ) @@ -1410,7 +1514,9 @@ def test_generic_cli_function_preprocess(): input_data_df, _ = parseTrainingCSV(file_data, train=False) # add random metadata to ensure it gets preserved input_data_df["metadata_test_string"] = input_data_df.shape[0] * ["test"] - input_data_df["metadata_test_float"] = np.random.rand(input_data_df.shape[0]) + input_data_df["metadata_test_float"] = np.random.rand( + input_data_df.shape[0] + ) input_data_df["metadata_test_int"] = np.random.randint( 0, 100, input_data_df.shape[0] ) @@ -1462,7 +1568,13 @@ def test_generic_cli_function_mainrun(device): file_data = os.path.join(inputDir, "train_2d_rad_segmentation.csv") main_run( - file_data, file_config_temp, outputDir, True, device, resume=False, reset=True + file_data, + file_config_temp, + outputDir, + True, + device, + resume=False, + reset=True, ) sanitize_outputDir() @@ -1532,7 +1644,9 @@ def test_dataloader_construction_train_segmentation_3d(device): parameters["model"]["dimension"] = 3 parameters["model"]["class_list"] = [0, 1] parameters["model"]["amp"] = True - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters["model"]["num_channels"] = len( + parameters["headers"]["channelHeaders"] + ) parameters["model"]["architecture"] = "unet" parameters["weighted_loss"] = False parameters["model"]["onnx_export"] = False @@ -1540,7 +1654,9 @@ def test_dataloader_construction_train_segmentation_3d(device): parameters["data_postprocessing"]["mapping"] = {0: 0, 1: 1} parameters["data_postprocessing"]["fill_holes"] = True parameters["data_postprocessing"]["cca"] = True - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) # loop through selected models and train for single epoch sanitize_outputDir() TrainingManager( @@ -1564,25 +1680,37 @@ def test_generic_preprocess_functions(): input_tensor = torch.rand(4, 256, 256, 1) input_transformed = global_preprocessing_dict["rgba2rgb"]()(input_tensor) assert input_transformed.shape[0] == 3, "Number of channels is not 3" - assert input_transformed.shape[1:] == input_tensor.shape[1:], "Shape mismatch" + assert ( + input_transformed.shape[1:] == input_tensor.shape[1:] + ), "Shape mismatch" input_tensor = torch.rand(3, 256, 256, 1) input_transformed = global_preprocessing_dict["rgb2rgba"]()(input_tensor) assert input_transformed.shape[0] == 4, "Number of channels is not 4" - assert input_transformed.shape[1:] == input_tensor.shape[1:], "Shape mismatch" + assert ( + input_transformed.shape[1:] == input_tensor.shape[1:] + ), "Shape mismatch" input_tensor = 2 * torch.rand(3, 256, 256, 1) - 1 - input_transformed = global_preprocessing_dict["normalize_div_by_255"](input_tensor) + input_transformed = global_preprocessing_dict["normalize_div_by_255"]( + input_tensor + ) input_tensor = 2 * torch.rand(1, 3, 256, 256) - 1 - input_transformed = global_preprocessing_dict["normalize_imagenet"](input_tensor) - input_transformed = global_preprocessing_dict["normalize_standardize"](input_tensor) - input_transformed = global_preprocessing_dict["normalize_div_by_255"](input_tensor) + input_transformed = global_preprocessing_dict["normalize_imagenet"]( + input_tensor + ) + input_transformed = global_preprocessing_dict["normalize_standardize"]( + input_tensor + ) + input_transformed = global_preprocessing_dict["normalize_div_by_255"]( + input_tensor + ) parameters_dict = {} parameters_dict["min"] = 0.25 parameters_dict["max"] = 0.75 - input_transformed = global_preprocessing_dict["threshold"](parameters_dict)( - input_tensor - ) + input_transformed = global_preprocessing_dict["threshold"]( + parameters_dict + )(input_tensor) assert ( torch.count_nonzero( input_transformed[input_transformed < parameters_dict["min"]] @@ -1591,7 +1719,9 @@ def test_generic_preprocess_functions(): == 0 ), "Input should be thresholded" - input_transformed = global_preprocessing_dict["clip"](parameters_dict)(input_tensor) + input_transformed = global_preprocessing_dict["clip"](parameters_dict)( + input_tensor + ) assert ( torch.count_nonzero( input_transformed[input_transformed < parameters_dict["min"]] @@ -1609,7 +1739,9 @@ def test_generic_preprocess_functions(): ## stain_normalization checks input_tensor = 2 * torch.rand(3, 256, 256, 1) + 10 - training_data, _ = parseTrainingCSV(inputDir + "/train_2d_rad_segmentation.csv") + training_data, _ = parseTrainingCSV( + inputDir + "/train_2d_rad_segmentation.csv" + ) parameters_temp = {} parameters_temp["data_preprocessing"] = {} parameters_temp["data_preprocessing"]["stain_normalizer"] = { @@ -1637,13 +1769,17 @@ def test_generic_preprocess_functions(): # adaptive histogram equalization parameters_temp = {} parameters_temp["data_preprocessing"] = {} - parameters_temp["data_preprocessing"]["histogram_matching"] = {"target": "adaptive"} + parameters_temp["data_preprocessing"]["histogram_matching"] = { + "target": "adaptive" + } non_zero_normalizer = global_preprocessing_dict["histogram_matching"]( parameters_temp["data_preprocessing"]["histogram_matching"] ) input_transformed = non_zero_normalizer(input_tensor) # histogram matching - training_data, _ = parseTrainingCSV(inputDir + "/train_3d_rad_segmentation.csv") + training_data, _ = parseTrainingCSV( + inputDir + "/train_3d_rad_segmentation.csv" + ) parameters_temp = {} parameters_temp["data_preprocessing"] = {} parameters_temp["data_preprocessing"]["histogram_matching"] = { @@ -1677,11 +1813,21 @@ def test_generic_preprocess_functions(): cropper = global_preprocessing_dict["crop"]([64, 64, 64]) input_transformed = cropper(input_tensor) - assert input_transformed.shape == (1, 128, 128, 128), "Cropping should work" + assert input_transformed.shape == ( + 1, + 128, + 128, + 128, + ), "Cropping should work" cropper = global_preprocessing_dict["centercrop"]([128, 128, 128]) input_transformed = cropper(input_tensor) - assert input_transformed.shape == (1, 128, 128, 128), "Center-crop should work" + assert input_transformed.shape == ( + 1, + 128, + 128, + 128, + ), "Center-crop should work" # test pure morphological operations input_tensor_3d = torch.rand(1, 1, 256, 256, 256) @@ -1702,12 +1848,18 @@ def test_generic_preprocess_functions(): # test obtaining arrays input_tensor_3d = torch.rand(256, 256, 256) input_array = get_array_from_image_or_tensor(input_tensor_3d) - assert isinstance(input_array, np.ndarray), "Array should be obtained from tensor" + assert isinstance( + input_array, np.ndarray + ), "Array should be obtained from tensor" input_image = sitk.GetImageFromArray(input_array) input_array = get_array_from_image_or_tensor(input_image) - assert isinstance(input_array, np.ndarray), "Array should be obtained from image" + assert isinstance( + input_array, np.ndarray + ), "Array should be obtained from image" input_array = get_array_from_image_or_tensor(input_array) - assert isinstance(input_array, np.ndarray), "Array should be obtained from array" + assert isinstance( + input_array, np.ndarray + ), "Array should be obtained from array" with pytest.raises(Exception) as exc_info: input_list = [0, 1] @@ -1777,7 +1929,9 @@ def test_generic_preprocess_functions(): expected_output = [10, 10] output_size_dict = {"resize": expected_output} input_transformed = resize_image(input_image, output_size_dict) - assert list(input_transformed.GetSize()) == expected_output, "Resize should work" + assert ( + list(input_transformed.GetSize()) == expected_output + ), "Resize should work" sanitize_outputDir() @@ -1834,7 +1988,9 @@ def test_generic_augmentation_functions(): default_range = [-0.1, 0.1] for key in ranges: - params["data_augmentation"]["hed_transform"].setdefault(key, default_range) + params["data_augmentation"]["hed_transform"].setdefault( + key, default_range + ) params["data_augmentation"]["hed_transform"].setdefault( "cutoff_range", [0.05, 0.95] @@ -1864,16 +2020,26 @@ def test_generic_augmentation_functions(): output_tensor = None if aug_lower in global_augs_dict: output_tensor = global_augs_dict[aug]( - params_all_preprocessing_and_augs["data_augmentation"][aug_lower] + params_all_preprocessing_and_augs["data_augmentation"][ + aug_lower + ] )(input_tensor) assert output_tensor != None, "Augmentation should work" # additional test for elastic - params_elastic = params_all_preprocessing_and_augs["data_augmentation"]["elastic"] - for key_to_pop in ["num_control_points", "max_displacement", "locked_borders"]: + params_elastic = params_all_preprocessing_and_augs["data_augmentation"][ + "elastic" + ] + for key_to_pop in [ + "num_control_points", + "max_displacement", + "locked_borders", + ]: params_elastic.pop(key_to_pop, None) output_tensor = global_augs_dict["elastic"](params_elastic)(input_tensor) - assert output_tensor != None, "Augmentation for base elastic transform should work" + assert ( + output_tensor != None + ), "Augmentation for base elastic transform should work" sanitize_outputDir() @@ -1916,7 +2082,9 @@ def test_train_checkpointing_segmentation_rad_2d(device): parameters["model"]["architecture"] = "unet" parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) sanitize_outputDir() TrainingManager( dataframe=training_data, @@ -1961,18 +2129,24 @@ def test_generic_model_patch_divisibility(): parameters["model"]["print_summary"] = False parameters["model"]["num_channels"] = 3 parameters["metrics"] = ["dice"] - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) # this assertion should fail with pytest.raises(BaseException) as _: - global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + global_models_dict[parameters["model"]["architecture"]]( + parameters=parameters + ) parameters["model"]["architecture"] = "uinc" parameters["model"]["base_filters"] = 11 # this assertion should fail with pytest.raises(BaseException) as _: - global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + global_models_dict[parameters["model"]["architecture"]]( + parameters=parameters + ) sanitize_outputDir() @@ -2008,7 +2182,8 @@ def test_generic_one_hot_logic(): # check combined foreground combined_array = np.logical_or( - np.logical_or((random_array == 1), (random_array == 2)), (random_array == 3) + np.logical_or((random_array == 1), (random_array == 2)), + (random_array == 3), ) comparison = combined_array == (img_tensor_oh_rev_array == 1) assert comparison.all(), "Arrays at the combined foreground are not equal" @@ -2029,8 +2204,12 @@ def test_generic_one_hot_logic(): ) for key, value in parameters["data_postprocessing"]["mapping"].items(): - comparison = (img_tensor_oh_rev_array == key) == (mapped_output == value) - assert comparison.all(), "Arrays at {}:{} are not equal".format(key, value) + comparison = (img_tensor_oh_rev_array == key) == ( + mapped_output == value + ) + assert comparison.all(), "Arrays at {}:{} are not equal".format( + key, value + ) # check the case where 0 is present as an int in a special case class_list = [0, "1||2||3", np.max(random_array)] @@ -2054,7 +2233,8 @@ def test_generic_one_hot_logic(): # check combined foreground combined_array = np.logical_or( - np.logical_or((random_array == 1), (random_array == 2)), (random_array == 3) + np.logical_or((random_array == 1), (random_array == 2)), + (random_array == 3), ) comparison = combined_array == (img_tensor_oh_rev_array == 1) assert comparison.all(), "Arrays at the combined foreground are not equal" @@ -2080,7 +2260,9 @@ def test_generic_anonymizer(): assert os.path.exists(output_file), "Anonymized file does not exist" # test nifti conversion - config_file_for_nifti = os.path.join(outputDir, "config_anonymizer_nifti.yaml") + config_file_for_nifti = os.path.join( + outputDir, "config_anonymizer_nifti.yaml" + ) with open(config_file, "r") as file_data: yaml_data = file_data.read() parameters = yaml.safe_load(yaml_data) @@ -2091,22 +2273,32 @@ def test_generic_anonymizer(): # for nifti conversion, the input needs to be in a dir input_folder_for_nifti = os.path.join(outputDir, "nifti_input") Path(input_folder_for_nifti).mkdir(parents=True, exist_ok=True) - shutil.copyfile(input_file, os.path.join(input_folder_for_nifti, "MR_small.dcm")) + shutil.copyfile( + input_file, os.path.join(input_folder_for_nifti, "MR_small.dcm") + ) output_file = os.path.join(outputDir, "MR_small.nii.gz") - run_anonymizer(input_folder_for_nifti, output_file, config_file_for_nifti, "rad") + run_anonymizer( + input_folder_for_nifti, output_file, config_file_for_nifti, "rad" + ) assert os.path.exists(output_file), "Anonymized file does not exist" if not os.path.exists(output_file): raise Exception("Output NIfTI file was not created") - input_file = os.path.join(inputDir, "2d_histo_segmentation", "1", "image.tiff") + input_file = os.path.join( + inputDir, "2d_histo_segmentation", "1", "image.tiff" + ) output_file_histo = os.path.join(outputDir, "histo_anon.tiff") # this assertion should fail since histo anonymizer is not implementer with pytest.raises(BaseException) as exc_info: - run_anonymizer(input_folder_for_nifti, output_file_histo, None, "histo") - assert os.path.exists(output_file_histo), "Anonymized file does not exist" + run_anonymizer( + input_folder_for_nifti, output_file_histo, None, "histo" + ) + assert os.path.exists( + output_file_histo + ), "Anonymized file does not exist" print("Exception raised: ", exc_info.value) sanitize_outputDir() @@ -2121,7 +2313,9 @@ def test_train_inference_segmentation_histology_2d(device): if os.path.isdir(output_dir_patches): shutil.rmtree(output_dir_patches) Path(output_dir_patches).mkdir(parents=True, exist_ok=True) - output_dir_patches_output = os.path.join(output_dir_patches, "histo_patches_output") + output_dir_patches_output = os.path.join( + output_dir_patches, "histo_patches_output" + ) Path(output_dir_patches_output).mkdir(parents=True, exist_ok=True) parameters_patch = {} @@ -2139,7 +2333,9 @@ def test_train_inference_segmentation_histology_2d(device): file_config_temp, ) - file_for_Training = os.path.join(output_dir_patches_output, "opm_train.csv") + file_for_Training = os.path.join( + output_dir_patches_output, "opm_train.csv" + ) # read and parse csv parameters = parseConfig( testingDir + "/config_segmentation.yaml", version_check_flag=False @@ -2151,7 +2347,9 @@ def test_train_inference_segmentation_histology_2d(device): parameters["model"]["class_list"] = [0, 255] parameters["model"]["amp"] = True parameters["model"]["num_channels"] = 3 - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) parameters["model"]["architecture"] = "resunet" parameters["nested_training"]["testing"] = 1 parameters["nested_training"]["validation"] = -2 @@ -2195,7 +2393,9 @@ def test_train_inference_classification_histology_large_2d(device): if os.path.isdir(output_dir_patches): shutil.rmtree(output_dir_patches) Path(output_dir_patches).mkdir(parents=True, exist_ok=True) - output_dir_patches_output = os.path.join(output_dir_patches, "histo_patches_output") + output_dir_patches_output = os.path.join( + output_dir_patches, "histo_patches_output" + ) Path(output_dir_patches_output).mkdir(parents=True, exist_ok=True) for sub in ["1", "2"]: @@ -2247,7 +2447,12 @@ def resize_for_ci(filename, scale): print("Trying vips:", ex1) try: os.system( - "vips resize " + filename + " " + new_filename + " " + str(scale) + "vips resize " + + filename + + " " + + new_filename + + " " + + str(scale) ) except Exception as ex2: print("Resize could not be done:", ex2) @@ -2255,7 +2460,9 @@ def resize_for_ci(filename, scale): for _, row in input_df.iterrows(): # ensure opm mask size check is triggered - _, _ = generate_initial_mask(resize_for_ci(row["Channel_0"], scale=2), 1) + _, _ = generate_initial_mask( + resize_for_ci(row["Channel_0"], scale=2), 1 + ) for patch_size in [ [128, 128], @@ -2281,7 +2488,9 @@ def resize_for_ci(filename, scale): input_df.to_csv(resized_inference_data_list, index=False) files_to_delete.append(resized_inference_data_list) - file_for_Training = os.path.join(output_dir_patches_output, "opm_train.csv") + file_for_Training = os.path.join( + output_dir_patches_output, "opm_train.csv" + ) temp_df = pd.read_csv(file_for_Training) temp_df.drop("Label", axis=1, inplace=True) temp_df["valuetopredict"] = np.random.randint(2, size=len(temp_df)) @@ -2301,7 +2510,9 @@ def resize_for_ci(filename, scale): parameters["model"]["architecture"] = "densenet121" parameters["model"]["norm_type"] = "none" parameters["data_preprocessing"]["rgba2rgb"] = "" - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) parameters["nested_training"]["testing"] = 1 parameters["nested_training"]["validation"] = -2 parameters["model"]["print_summary"] = False @@ -2318,7 +2529,9 @@ def resize_for_ci(filename, scale): reset=True, ) parameters["output_dir"] = modelDir # this is in inference mode - parameters["data_preprocessing"]["resize_patch"] = parameters_patch["patch_size"] + parameters["data_preprocessing"]["resize_patch"] = parameters_patch[ + "patch_size" + ] parameters["patch_size"] = [ parameters_patch["patch_size"][0] * 10, parameters_patch["patch_size"][1] * 10, @@ -2368,7 +2581,9 @@ def test_train_inference_classification_histology_2d(device): if os.path.isdir(output_dir_patches): shutil.rmtree(output_dir_patches) Path(output_dir_patches).mkdir(parents=True, exist_ok=True) - output_dir_patches_output = os.path.join(output_dir_patches, "histo_patches_output") + output_dir_patches_output = os.path.join( + output_dir_patches, "histo_patches_output" + ) parameters_patch = {} # extracting minimal number of patches to ensure that the test does not take too long @@ -2388,7 +2603,9 @@ def test_train_inference_classification_histology_2d(device): file_config_temp, ) - file_for_Training = os.path.join(output_dir_patches_output, "opm_train.csv") + file_for_Training = os.path.join( + output_dir_patches_output, "opm_train.csv" + ) temp_df = pd.read_csv(file_for_Training) temp_df.drop("Label", axis=1, inplace=True) temp_df["valuetopredict"] = np.random.randint(2, size=6) @@ -2408,7 +2625,9 @@ def test_train_inference_classification_histology_2d(device): parameters["model"]["architecture"] = "densenet121" parameters["model"]["norm_type"] = "none" parameters["data_preprocessing"]["rgba2rgb"] = "" - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) parameters["nested_training"]["testing"] = 1 parameters["nested_training"]["validation"] = -2 parameters["model"]["print_summary"] = False @@ -2476,7 +2695,9 @@ def test_train_segmentation_unet_layerchange_rad_2d(device): parameters["model"]["amp"] = True parameters["model"]["print_summary"] = False parameters["model"]["num_channels"] = 3 - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) # loop through selected models and train for single epoch parameters["model"]["norm_type"] = "batch" parameters["nested_training"]["testing"] = -5 @@ -2514,18 +2735,24 @@ def test_train_segmentation_unetr_rad_3d(device): # this assertion should fail with pytest.raises(BaseException) as _: - global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + global_models_dict[parameters["model"]["architecture"]]( + parameters=parameters + ) parameters["model"]["dimension"] = 3 parameters["patch_size"] = [32, 32, 32] with pytest.raises(BaseException) as _: parameters["model"]["inner_patch_size"] = 19 - global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + global_models_dict[parameters["model"]["architecture"]]( + parameters=parameters + ) with pytest.raises(BaseException) as _: parameters["model"]["inner_patch_size"] = 64 - global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + global_models_dict[parameters["model"]["architecture"]]( + parameters=parameters + ) for patch in [16, 8]: parameters["model"]["inner_patch_size"] = patch @@ -2534,7 +2761,9 @@ def test_train_segmentation_unetr_rad_3d(device): parameters["model"]["num_channels"] = len( parameters["headers"]["channelHeaders"] ) - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) # loop through selected models and train for single epoch parameters["model"]["norm_type"] = "batch" parameters["nested_training"]["testing"] = -5 @@ -2574,7 +2803,9 @@ def test_train_segmentation_unetr_rad_2d(device): parameters["model"]["amp"] = True parameters["model"]["print_summary"] = False parameters["model"]["num_channels"] = 3 - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) # loop through selected models and train for single epoch parameters["model"]["norm_type"] = "batch" parameters["nested_training"]["testing"] = -5 @@ -2612,12 +2843,16 @@ def test_train_segmentation_transunet_rad_2d(device): with pytest.raises(BaseException) as _: parameters["model"]["num_heads"] = 6 parameters["model"]["embed_dim"] = 64 - global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + global_models_dict[parameters["model"]["architecture"]]( + parameters=parameters + ) with pytest.raises(BaseException) as _: parameters["model"]["num_heads"] = 3 parameters["model"]["embed_dim"] = 50 - global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + global_models_dict[parameters["model"]["architecture"]]( + parameters=parameters + ) parameters["model"]["embed_dim"] = 64 parameters["model"]["depth"] = 2 @@ -2625,7 +2860,9 @@ def test_train_segmentation_transunet_rad_2d(device): parameters["model"]["num_heads"] = 8 parameters["model"]["amp"] = True parameters["model"]["num_channels"] = 3 - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) # loop through selected models and train for single epoch parameters["model"]["norm_type"] = "batch" parameters["nested_training"]["testing"] = -5 @@ -2662,32 +2899,44 @@ def test_train_segmentation_transunet_rad_3d(device): # this assertion should fail with pytest.raises(BaseException) as _: - global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + global_models_dict[parameters["model"]["architecture"]]( + parameters=parameters + ) parameters["model"]["dimension"] = 3 parameters["patch_size"] = [32, 32, 32] with pytest.raises(BaseException) as _: parameters["model"]["depth"] = 1 - global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + global_models_dict[parameters["model"]["architecture"]]( + parameters=parameters + ) with pytest.raises(BaseException) as _: parameters["model"]["num_heads"] = 6 parameters["model"]["embed_dim"] = 64 - global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + global_models_dict[parameters["model"]["architecture"]]( + parameters=parameters + ) with pytest.raises(BaseException) as _: parameters["model"]["num_heads"] = 3 parameters["model"]["embed_dim"] = 50 - global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + global_models_dict[parameters["model"]["architecture"]]( + parameters=parameters + ) parameters["model"]["num_heads"] = 8 parameters["model"]["embed_dim"] = 64 parameters["model"]["depth"] = 2 parameters["model"]["class_list"] = [0, 255] parameters["model"]["amp"] = True - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters["model"]["num_channels"] = len( + parameters["headers"]["channelHeaders"] + ) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) # loop through selected models and train for single epoch parameters["model"]["norm_type"] = "batch" parameters["nested_training"]["testing"] = -5 @@ -2726,7 +2975,9 @@ def test_train_gradient_clipping_classification_rad_2d(device): parameters["model"]["num_channels"] = 3 parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) # ensure gradient clipping is getting tested for clip_mode in ["norm", "value", "agc"]: parameters["model"]["architecture"] = "imagenet_vgg11" @@ -2749,7 +3000,9 @@ def test_train_gradient_clipping_classification_rad_2d(device): def test_train_segmentation_unet_conversion_rad_3d(device): - print("43: Starting 3D Rad segmentation tests for unet with ACS conversion") + print( + "43: Starting 3D Rad segmentation tests for unet with ACS conversion" + ) # read and parse csv # read and initialize parameters for specific data dimension parameters = parseConfig( @@ -2766,10 +3019,14 @@ def test_train_segmentation_unet_conversion_rad_3d(device): parameters["model"]["amp"] = True parameters["in_memory"] = True parameters["verbose"] = False - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters["model"]["num_channels"] = len( + parameters["headers"]["channelHeaders"] + ) parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) # loop through selected models and train for single epoch for model in ["unet", "unet_multilayer", "lightunet_multilayer"]: for converter_type in ["acs", "soft", "conv3d"]: @@ -2809,7 +3066,9 @@ def test_generic_cli_function_configgenerator(): parameters = parseConfig( os.path.join(outputDir, file), version_check_flag=False ) - assert parameters, "config generator did not generate valid config files" + assert ( + parameters + ), "config generator did not generate valid config files" sanitize_outputDir() generator_config = yaml.safe_load(open(generator_config_path, "r")) @@ -2851,7 +3110,9 @@ def test_generic_cli_function_recoverconfig(): parameters["model"]["architecture"] = "sdnet" parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) sanitize_outputDir() TrainingManager( dataframe=training_data, @@ -2899,7 +3160,9 @@ def test_generic_deploy_docker(): parameters["data_preprocessing"]["resize_image"] = [224, 224] parameters["memory_save_mode"] = True - parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters = populate_header_in_parameters( + parameters, parameters["headers"] + ) sanitize_outputDir() TrainingManager( dataframe=training_data, @@ -2936,7 +3199,9 @@ def test_generic_deploy_docker(): def test_collision_subjectid_test_segmentation_rad_2d(device): - print("47: Starting 2D Rad segmentation tests for collision of subjectID in test") + print( + "47: Starting 2D Rad segmentation tests for collision of subjectID in test" + ) parameters = parseConfig( testingDir + "/config_segmentation.yaml", version_check_flag=False ) @@ -2999,10 +3264,14 @@ def test_generic_random_numbers_are_deterministic_on_cpu(): assert np.allclose(a, c) assert np.allclose(b, d) - e, f = [random.random() for _ in range(5)], [random.random() for _ in range(5)] + e, f = [random.random() for _ in range(5)], [ + random.random() for _ in range(5) + ] set_determinism(seed=42) - g, h = [random.random() for _ in range(5)], [random.random() for _ in range(5)] + g, h = [random.random() for _ in range(5)], [ + random.random() for _ in range(5) + ] # Check that the generated random numbers are the same with Python's built-in random module assert e == g @@ -3065,7 +3334,9 @@ def test_generic_cli_function_metrics_cli_rad_nd(): # run the metrics calculation generate_metrics_dict(temp_infer_csv, temp_config, output_file) - assert os.path.isfile(output_file), "Metrics output file was not generated" + assert os.path.isfile( + output_file + ), "Metrics output file was not generated" sanitize_outputDir() From baf5153d078ddaabec4bb9857a6adb61bcaad78a Mon Sep 17 00:00:00 2001 From: Szymon Date: Sun, 19 Nov 2023 09:38:05 +0100 Subject: [PATCH 3/3] Hotfix for test_full.py script --- testing/test_full.py | 1 - 1 file changed, 1 deletion(-) diff --git a/testing/test_full.py b/testing/test_full.py index cf8419849..95e0d3845 100644 --- a/testing/test_full.py +++ b/testing/test_full.py @@ -277,7 +277,6 @@ def test_train_segmentation_rad_2d(device): parameters["nested_training"]["testing"] = -5 parameters["nested_training"]["validation"] = -5 sanitize_outputDir() - print(parameters) TrainingManager( dataframe=training_data, outputDir=outputDir,