From d90aa7f62a414afdadf1f42f137968bad48e13c9 Mon Sep 17 00:00:00 2001 From: Joost van Griethuysen Date: Thu, 31 Jan 2019 09:45:10 +0000 Subject: [PATCH 1/4] ENH: Add CLI for voxel-based extraction Add a new switch in PyRadiomics commandline entrypoint: `--mode`, `-m`, which takes either `segment` (default) or `voxel`. Add a new switch in PyRadiomics commandline entrypoint: `--out-dir`, `-od`, which specifies a directory (created if not exists) where featuremaps (mode `voxel`) or intermediate files (mode `segment`) are stored. If not specified, featuremaps are stored in the current working directory and intermediate files are not stored. Refactor the commandline parsing and running of PyRadiomics. Allow adding diagnositic info to the output when extracting voxel-based features. After extracting features in a case, results are split into featuremaps, which are stored as SimpleITK images and non-features, which are processed as the output of segment-based extraction. --- radiomics/featureextractor.py | 8 +- radiomics/scripts/__init__.py | 803 ++++++++++++++++++---------------- radiomics/scripts/segment.py | 131 ++---- radiomics/scripts/voxel.py | 125 ++++++ 4 files changed, 596 insertions(+), 471 deletions(-) create mode 100644 radiomics/scripts/voxel.py diff --git a/radiomics/featureextractor.py b/radiomics/featureextractor.py index 1195d548..694a9225 100644 --- a/radiomics/featureextractor.py +++ b/radiomics/featureextractor.py @@ -422,11 +422,11 @@ def execute(self, imageFilepath, maskFilepath, label=None, voxelBased=False): if resegmentShape and resegmentedMask is not None: mask = resegmentedMask - if not voxelBased: - # 3. Add the additional information if enabled - if self.generalInfo is not None: - featureVector.update(self.generalInfo.getGeneralInfo()) + # 3. Add the additional information if enabled + if self.generalInfo is not None: + featureVector.update(self.generalInfo.getGeneralInfo()) + if not voxelBased: # 4. If shape descriptors should be calculated, handle it separately here if 'shape' in self._enabledFeatures.keys(): featureVector.update(self.computeShape(image, mask, boundingBox)) diff --git a/radiomics/scripts/__init__.py b/radiomics/scripts/__init__.py index 2e1de82d..547eda24 100644 --- a/radiomics/scripts/__init__.py +++ b/radiomics/scripts/__init__.py @@ -2,6 +2,7 @@ import argparse import csv from functools import partial +import json import logging.config import logging.handlers from multiprocessing import cpu_count, Manager, Pool @@ -9,396 +10,458 @@ import sys import threading +import numpy from pykwalify.compat import yaml import pykwalify.core import six.moves import radiomics -from . import segment - - -scriptlogger = logging.getLogger('radiomics.script') # holds logger for script events -relative_path_start = os.getcwd() - - -def parse_args(custom_arguments=None): - global relative_path_start - parser = argparse.ArgumentParser(usage='%(prog)s image|batch [mask] [Options]', - formatter_class=argparse.RawTextHelpFormatter) - - inputGroup = parser.add_argument_group(title='Input', - description='Input files and arguments defining the extraction:\n' - '- image and mask files (single mode) ' - 'or CSV-file specifying them (batch mode)\n' - '- Parameter file (.yml/.yaml or .json)\n' - '- Overrides for customization type 3 ("settings")\n' - '- Multi-threaded batch processing') - inputGroup.add_argument('input', metavar='{Image,Batch}FILE', - help='Image file (single mode) or CSV batch file (batch mode)') - inputGroup.add_argument('mask', nargs='?', metavar='MaskFILE', default=None, - help='Mask file identifying the ROI in the Image. \n' - 'Only required when in single mode, ignored otherwise.') - inputGroup.add_argument('--param', '-p', metavar='FILE', default=None, - help='Parameter file containing the settings to be used in extraction') - inputGroup.add_argument('--setting', '-s', metavar='"SETTING_NAME:VALUE"', action='append', default=[], type=str, - help='Additional parameters which will override those in the\n' - 'parameter file and/or the default settings. Multiple\n' - 'settings possible. N.B. Only works for customization\n' - 'type 3 ("setting").') - inputGroup.add_argument('--jobs', '-j', metavar='N', type=int, default=1, choices=six.moves.range(1, cpu_count() + 1), - help='(Batch mode only) Specifies the number of threads to use for\n' - 'parallel processing. This is applied at the case level;\n' - 'i.e. 1 thread per case. Actual number of workers used is\n' - 'min(cases, jobs).') - inputGroup.add_argument('--validate', action='store_true', - help='If specified, check if input is valid and check if file locations point to exisiting ' - 'files') - - outputGroup = parser.add_argument_group(title='Output', description='Arguments controlling output redirection and ' - 'the formatting of calculated results.') - outputGroup.add_argument('--out', '-o', metavar='FILE', type=argparse.FileType('a'), default=sys.stdout, - help='File to append output to') - outputGroup.add_argument('--skip-nans', action='store_true', - help='Add this argument to skip returning features that have an\n' - 'invalid result (NaN)') - outputGroup.add_argument('--format', '-f', choices=['csv', 'json', 'txt'], default='txt', - help='Format for the output.\n' - '"txt" (Default): one feature per line in format "case-N_name:value"\n' - '"json": Features are written in a JSON format dictionary\n' - '(1 dictionary per case, 1 case per line) "{name:value}"\n' - '"csv": one row of feature names, followed by one row of\n' - 'feature values per case.') - outputGroup.add_argument('--format-path', choices=['absolute', 'relative', 'basename'], default='absolute', - help='Controls input image and mask path formatting in the output.\n' - '"absolute" (Default): Absolute file paths.\n' - '"relative": File paths relative to current working directory.\n' - '"basename": Only stores filename.') - - loggingGroup = parser.add_argument_group(title='Logging', - description='Controls the (amount of) logging output to the ' - 'console and the (optional) log-file.') - loggingGroup.add_argument('--logging-level', metavar='LEVEL', - choices=['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], - default='WARNING', help='Set capture level for logging') - loggingGroup.add_argument('--log-file', metavar='FILE', default=None, help='File to append logger output to') - loggingGroup.add_argument('--verbosity', '-v', action='store', nargs='?', default=3, const=4, type=int, - choices=[1, 2, 3, 4, 5], - help='Regulate output to stderr. By default [3], level\n' - 'WARNING and up are printed. By specifying this\n' - 'argument without a value, level INFO [4] is assumed.\n' - 'A higher value results in more verbose output.') - parser.add_argument('--label', '-l', metavar='N', default=None, type=int, - help='(DEPRECATED) Value of label in mask to use for\n' - 'feature extraction.') - - parser.add_argument('--version', action='version', help='Print version and exit', - version='%(prog)s ' + radiomics.__version__) - - args = parser.parse_args(args=custom_arguments) # Exits with code 2 if parsing fails - - # variable to hold the listener needed for processing parallel log records - queue_listener = None - - # Run the extraction - try: - logging_config, queue_listener = _configureLogging(args) - scriptlogger.info('Starting PyRadiomics (version: %s)', radiomics.__version__) - input_tuple = _processInput(args) - if input_tuple is not None: - if args.validate: - _validateCases(*input_tuple) - else: - results = _extractSegment(*input_tuple, logging_config=logging_config) - segment.processOutput(results, args.out, args.skip_nans, args.format, args.format_path, relative_path_start) - scriptlogger.info('Finished extraction successfully...') - else: - return 1 # Feature extraction error - except (KeyboardInterrupt, SystemExit): - scriptlogger.info('Cancelling Extraction') - return -1 - except Exception: - scriptlogger.error('Error extracting features!', exc_info=True) - return 3 # Unknown error - finally: - if queue_listener is not None: - queue_listener.stop() - return 0 # success - - -def _processInput(args): - global relative_path_start, scriptlogger - scriptlogger.info('Processing input...') - - caseCount = 1 - num_workers = 1 - - # Check if input represents a batch file - if args.input.endswith('.csv'): - scriptlogger.debug('Loading batch file "%s"', args.input) - relative_path_start = os.path.dirname(args.input) - with open(args.input, mode='r') as batchFile: - cr = csv.DictReader(batchFile, lineterminator='\n') - - # Check if required Image and Mask columns are present - if 'Image' not in cr.fieldnames: - scriptlogger.error('Required column "Image" not present in input, unable to extract features...') - return None - if 'Mask' not in cr.fieldnames: - scriptlogger.error('Required column "Mask" not present in input, unable to extract features...') - return None - - cases = [] - for row_idx, row in enumerate(cr, start=2): - if row['Image'] is None or row['Mask'] is None: - scriptlogger.warning('Batch L%d: Missing required Image or Mask, skipping this case...', row_idx) - continue - imPath = row['Image'] - maPath = row['Mask'] - if not os.path.isabs(imPath): - imPath = os.path.abspath(os.path.join(relative_path_start, imPath)) - scriptlogger.debug('Updated relative image filepath to be relative to input CSV: %s', imPath) - if not os.path.isabs(maPath): - maPath = os.path.abspath(os.path.join(relative_path_start, maPath)) - scriptlogger.debug('Updated relative mask filepath to be relative to input CSV: %s', maPath) - cases.append(row) - cases[-1]['Image'] = imPath - cases[-1]['Mask'] = maPath - - caseCount = len(cases) - caseGenerator = _buildGenerator(args, cases) - num_workers = min(caseCount, args.jobs) - elif args.mask is not None: - caseGenerator = _buildGenerator(args, [{'Image': args.input, 'Mask': args.mask}]) - else: - scriptlogger.error('Input is not recognized as batch, no mask specified, cannot compute result!') - return None - - return caseGenerator, caseCount, num_workers - - -def _extractSegment(case_generator, case_count, num_workers, logging_config): - if num_workers > 1: # multiple cases, parallel processing enabled - scriptlogger.info('Input valid, starting parallel extraction from %d cases with %d workers...', - case_count, num_workers) - pool = Pool(num_workers) - try: - task = pool.map_async(partial(segment.extractSegment_parallel, logging_config=logging_config), - case_generator, - chunksize=min(10, case_count)) - # Wait for the results to be done. task.get() without timeout performs a blocking call, which prevents - # the program from processing the KeyboardInterrupt if it occurs - while not task.ready(): - pass - results = task.get() - pool.close() - except (KeyboardInterrupt, SystemExit): - pool.terminate() - raise - finally: - pool.join() - elif num_workers == 1: # single case or sequential batch processing - scriptlogger.info('Input valid, starting sequential extraction from %d case(s)...', - case_count) - results = [] - for case in case_generator: - results.append(segment.extractSegment(*case)) - else: - # No cases defined in the batch - scriptlogger.error('No cases to process...') - results = None - return results - - -def _validateCases(case_generator, case_count, num_workers): - global scriptlogger - scriptlogger.info('Validating input for %i cases', case_count) - errored_cases = 0 - for case_idx, case, param, setting_overrides in case_generator: - if case_idx == 1 and param is not None: - if not os.path.isfile(param): - scriptlogger.error('Path for specified parameter file does not exist!') - else: - schemaFile, schemaFuncs = radiomics.getParameterValidationFiles() - - c = pykwalify.core.Core(source_file=param, schema_files=[schemaFile], extensions=[schemaFuncs]) - try: - c.validate() - except (KeyboardInterrupt, SystemExit): - raise - except Exception as e: - scriptlogger.error('Parameter validation failed!\n%s' % e.message) - scriptlogger.debug("Validating case (%i/%i): %s", case_idx, case_count, case) - - case_error = False - if not os.path.isfile(case['Image']): - case_error = True - scriptlogger.error('Image path for case (%i/%i) does not exist!', case_idx, case_count) - if not os.path.isfile(case['Mask']): - case_error = True - scriptlogger.error('Mask path for case (%i/%i) does not exist!', case_idx, case_count) - - if case_error: - errored_cases += 1 +from . import segment, voxel - scriptlogger.info('Validation complete, errors found in %i case(s)', errored_cases) +class PyRadiomicsCommandLine: -def _buildGenerator(args, cases): - global scriptlogger - setting_overrides = _parseOverrides(args.setting) + def __init__(self, custom_arguments=None): + self.logger = logging.getLogger('radiomics.script') # holds logger for script events + self.relative_path_start = os.getcwd() + self.args = self.getParser().parse_args(args=custom_arguments) # Exits with code 2 if parsing fails - # Section for deprecated argument label - if args.label is not None: - scriptlogger.warning('Argument "label" is deprecated. To specify a custom label, use argument "setting" as follows:' - '"--setting=label:N", where N is the a label value.') - setting_overrides['label'] = args.label - # End deprecated section + self.logging_config, self.queue_listener = self._configureLogging() - for case_idx, case in enumerate(cases, start=1): - yield case_idx, case, args.param, setting_overrides - - -def _parseOverrides(overrides): - global scriptlogger - setting_overrides = {} - - # parse overrides - if len(overrides) == 0: - scriptlogger.debug('No overrides found') - return setting_overrides - - scriptlogger.debug('Reading parameter schema') - schemaFile, schemaFuncs = radiomics.getParameterValidationFiles() - with open(schemaFile) as schema: - settingsSchema = yaml.load(schema)['mapping']['setting']['mapping'] - - # parse single value function - def parse_value(value, value_type): - if value_type == 'str': - return value # no conversion - elif value_type == 'int': - return int(value) - elif value_type == 'float': - return float(value) - elif value_type == 'bool': - return value == '1' or value.lower() == 'true' + if self.args.mode == 'segment': + self.serial_func = segment.extractSegment + self.parallel_func = segment.extractSegment_parallel else: - raise ValueError('Cannot understand value_type "%s"' % value_type) - - for setting in overrides: # setting = "setting_key:setting_value" - if ':' not in setting: - scriptlogger.warning('Incorrect format for override setting "%s", missing ":"', setting) - continue - # split into key and value - setting_key, setting_value = setting.split(':', 2) - - # Check if it is a valid PyRadiomics Setting - if setting_key not in settingsSchema: - scriptlogger.warning('Did not recognize override "%s", skipping...', setting_key) - continue - - # Try to parse the value by looking up its type in the settingsSchema + self.serial_func = voxel.extractVoxel + self.parallel_func = voxel.extractVoxel_parallel + + self.case_count = 0 + self.num_workers = 0 + + @classmethod + def getParser(cls): + parser = argparse.ArgumentParser(usage='%(prog)s image|batch [mask] [Options]', + formatter_class=argparse.RawTextHelpFormatter) + + inputGroup = parser.add_argument_group(title='Input', + description='Input files and arguments defining the extraction:\n' + '- image and mask files (single mode) ' + 'or CSV-file specifying them (batch mode)\n' + '- Parameter file (.yml/.yaml or .json)\n' + '- Overrides for customization type 3 ("settings")\n' + '- Multi-threaded batch processing') + inputGroup.add_argument('input', metavar='{Image,Batch}FILE', + help='Image file (single mode) or CSV batch file (batch mode)') + inputGroup.add_argument('mask', nargs='?', metavar='MaskFILE', default=None, + help='Mask file identifying the ROI in the Image. \n' + 'Only required when in single mode, ignored otherwise.') + inputGroup.add_argument('--param', '-p', metavar='FILE', default=None, + help='Parameter file containing the settings to be used in extraction') + inputGroup.add_argument('--setting', '-s', metavar='"SETTING_NAME:VALUE"', action='append', default=[], type=str, + help='Additional parameters which will override those in the\n' + 'parameter file and/or the default settings. Multiple\n' + 'settings possible. N.B. Only works for customization\n' + 'type 3 ("setting").') + inputGroup.add_argument('--jobs', '-j', metavar='N', type=int, default=1, + choices=six.moves.range(1, cpu_count() + 1), + help='(Batch mode only) Specifies the number of threads to use for\n' + 'parallel processing. This is applied at the case level;\n' + 'i.e. 1 thread per case. Actual number of workers used is\n' + 'min(cases, jobs).') + inputGroup.add_argument('--validate', action='store_true', + help='If specified, check if input is valid and check if file locations point to exisiting ' + 'files') + + outputGroup = parser.add_argument_group(title='Output', description='Arguments controlling output redirection and ' + 'the formatting of calculated results.') + outputGroup.add_argument('--out', '-o', metavar='FILE', type=argparse.FileType('a'), default=sys.stdout, + help='File to append output to.') + outputGroup.add_argument('--out-dir', '-od', type=str, default=None, + help='Directory to store output. If specified in segment mode, this writes csv output for ' + 'each processed case. In voxel mode, this directory is used to store the featuremaps.' + ' If not specified in voxel mode, the current working directory is used instead.') + outputGroup.add_argument('--mode', '-m', choices=['segment', 'voxel'], default='segment', + help='Extraction mode for PyRadiomics.') + outputGroup.add_argument('--skip-nans', action='store_true', + help='Add this argument to skip returning features that have an\n' + 'invalid result (NaN)') + outputGroup.add_argument('--format', '-f', choices=['csv', 'json', 'txt'], default='txt', + help='Format for the output.\n' + '"txt" (Default): one feature per line in format "case-N_name:value"\n' + '"json": Features are written in a JSON format dictionary\n' + '(1 dictionary per case, 1 case per line) "{name:value}"\n' + '"csv": one row of feature names, followed by one row of\n' + 'feature values per case.') + outputGroup.add_argument('--format-path', choices=['absolute', 'relative', 'basename'], default='absolute', + help='Controls input image and mask path formatting in the output.\n' + '"absolute" (Default): Absolute file paths.\n' + '"relative": File paths relative to current working directory.\n' + '"basename": Only stores filename.') + + loggingGroup = parser.add_argument_group(title='Logging', + description='Controls the (amount of) logging output to the ' + 'console and the (optional) log-file.') + loggingGroup.add_argument('--logging-level', metavar='LEVEL', + choices=['NOTSET', 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], + default='WARNING', help='Set capture level for logging') + loggingGroup.add_argument('--log-file', metavar='FILE', default=None, help='File to append logger output to') + loggingGroup.add_argument('--verbosity', '-v', action='store', nargs='?', default=3, const=4, type=int, + choices=[1, 2, 3, 4, 5], + help='Regulate output to stderr. By default [3], level\n' + 'WARNING and up are printed. By specifying this\n' + 'argument without a value, level INFO [4] is assumed.\n' + 'A higher value results in more verbose output.') + parser.add_argument('--label', '-l', metavar='N', default=None, type=int, + help='(DEPRECATED) Value of label in mask to use for\n' + 'feature extraction.') + + parser.add_argument('--version', action='version', help='Print version and exit', + version='%(prog)s ' + radiomics.__version__) + return parser + + def run(self): + # Run the extraction try: - setting_def = settingsSchema[setting_key] - setting_type = 'str' # If type is omitted in the schema, treat it as string (no conversion) - if 'seq' in setting_def: - # Multivalued setting - if len(setting_def['seq']) > 0 and 'type' in setting_def['seq'][0]: - setting_type = setting_def['seq'][0]['type'] - - setting_overrides[setting_key] = [parse_value(val, setting_type) for val in setting_value.split(',')] - scriptlogger.debug('Parsed "%s" as list (element type "%s"); value: %s', - setting_key, setting_type, setting_overrides[setting_key]) + self.logger.info('Starting PyRadiomics (version: %s)', radiomics.__version__) + caseGenerator = self._processInput() + if caseGenerator is not None: + if self.args.validate: + self._validateCases(caseGenerator) + else: + results = self._processCases(caseGenerator) + self._processOutput(results) + self.logger.info('Finished %s-based extraction successfully...', self.args.mode) else: - if 'type' in setting_def: - setting_type = setting_def['type'] - setting_overrides[setting_key] = parse_value(setting_value, setting_type) - scriptlogger.debug('Parsed "%s" as type "%s"; value: %s', setting_key, setting_type, setting_overrides[setting_key]) - + return 1 # Feature extraction error except (KeyboardInterrupt, SystemExit): - raise + self.logger.info('Cancelling Extraction') + return -1 except Exception: - scriptlogger.warning('Could not parse value "%s" for setting "%s", skipping...', setting_value, setting_key) - - return setting_overrides - - -def _configureLogging(args): - global scriptlogger - - # Listener to process log messages from child processes in case of multiprocessing - queue_listener = None + self.logger.error('Error extracting features!', exc_info=True) + return 3 # Unknown error + finally: + if self.queue_listener is not None: + self.queue_listener.stop() + return 0 # success + + def _processInput(self): + self.logger.info('Processing input...') + + self.case_count = 1 + self.num_workers = 1 + + # Check if input represents a batch file + if self.args.input.endswith('.csv'): + self.logger.debug('Loading batch file "%s"', self.args.input) + self.relative_path_start = os.path.dirname(self.args.input) + with open(self.args.input, mode='r') as batchFile: + cr = csv.DictReader(batchFile, lineterminator='\n') + + # Check if required Image and Mask columns are present + if 'Image' not in cr.fieldnames: + self.logger.error('Required column "Image" not present in input, unable to extract features...') + return None + if 'Mask' not in cr.fieldnames: + self.logger.error('Required column "Mask" not present in input, unable to extract features...') + return None + + cases = [] + for row_idx, row in enumerate(cr, start=2): + if row['Image'] is None or row['Mask'] is None: + self.logger.warning('Batch L%d: Missing required Image or Mask, skipping this case...', row_idx) + continue + imPath = row['Image'] + maPath = row['Mask'] + if not os.path.isabs(imPath): + imPath = os.path.abspath(os.path.join(self.relative_path_start, imPath)) + self.logger.debug('Considered image filepath to be relative to input CSV. Absolute path: %s', imPath) + if not os.path.isabs(maPath): + maPath = os.path.abspath(os.path.join(self.relative_path_start, maPath)) + self.logger.debug('Considered mask filepath to be relative to input CSV. Absolute path: %s', maPath) + cases.append(row) + cases[-1]['Image'] = imPath + cases[-1]['Mask'] = maPath + + self.case_count = len(cases) + caseGenerator = self._buildGenerator(cases) + self.num_workers = min(self.case_count, self.args.jobs) + elif self.args.mask is not None: + caseGenerator = self._buildGenerator([{'Image': self.args.input, 'Mask': self.args.mask}]) + else: + self.logger.error('Input is not recognized as batch, no mask specified, cannot compute result!') + return None + + return caseGenerator + + def _validateCases(self, case_generator): + self.logger.info('Validating input for %i cases', self.case_count) + errored_cases = 0 + for case_idx, case, param, setting_overrides in case_generator: + if case_idx == 1 and param is not None: + if not os.path.isfile(param): + self.logger.error('Path for specified parameter file does not exist!') + else: + schemaFile, schemaFuncs = radiomics.getParameterValidationFiles() + + c = pykwalify.core.Core(source_file=param, schema_files=[schemaFile], extensions=[schemaFuncs]) + try: + c.validate() + except (KeyboardInterrupt, SystemExit): + raise + except Exception as e: + self.logger.error('Parameter validation failed!\n%s' % e.message) + self.logger.debug("Validating case (%i/%i): %s", case_idx, self.case_count, case) + + case_error = False + if not os.path.isfile(case['Image']): + case_error = True + self.logger.error('Image path for case (%i/%i) does not exist!', case_idx, self.case_count) + if not os.path.isfile(case['Mask']): + case_error = True + self.logger.error('Mask path for case (%i/%i) does not exist!', case_idx, self.case_count) + + if case_error: + errored_cases += 1 + + self.logger.info('Validation complete, errors found in %i case(s)', errored_cases) + + def _processCases(self, case_generator): + if self.num_workers > 1: # multiple cases, parallel processing enabled + self.logger.info('Input valid, starting parallel extraction from %d cases with %d workers...', + self.case_count, self.num_workers) + pool = Pool(self.num_workers) + try: + task = pool.map_async(partial(self.parallel_func, + out_dir=self.args.out_dir, + logging_config=self.logging_config), + case_generator, + chunksize=min(10, self.case_count)) + # Wait for the results to be done. task.get() without timeout performs a blocking call, which prevents + # the program from processing the KeyboardInterrupt if it occurs + while not task.ready(): + pass + results = task.get() + pool.close() + except (KeyboardInterrupt, SystemExit): + pool.terminate() + raise + finally: + pool.join() + elif self.num_workers == 1: # single case or sequential batch processing + self.logger.info('Input valid, starting sequential extraction from %d case(s)...', + self.case_count) + results = [] + for case in case_generator: + results.append(self.serial_func(*case, out_dir=self.args.out_dir)) + else: + # No cases defined in the batch + self.logger.error('No cases to process...') + results = None + return results + + def _processOutput(self, results): + self.logger.info('Processing results...') + + # Store the header of all calculated features + headers = results[0].keys() + + # Set the formatting rule for image and mask paths + if self.args.format_path == 'absolute': + pathFormatter = os.path.abspath + elif self.args.format_path == 'relative': + pathFormatter = partial(os.path.relpath, start=self.relative_path_start) + elif self.args.format_path == 'basename': + pathFormatter = os.path.basename + else: + self.logger.warning('Unrecognized format for paths (%s), reverting to default ("absolute")', self.args.format_path) + pathFormatter = os.path.abspath + + for case_idx, case in enumerate(results, start=1): + # if specified, skip NaN values + if self.args.skip_nans: + for key in list(case.keys()): + if isinstance(case[key], float) and numpy.isnan(case[key]): + self.logger.debug('Case %d, feature %s computed NaN, removing from results', case_idx, key) + del case[key] + + # Format paths of image and mask files + case['Image'] = pathFormatter(case['Image']) + case['Mask'] = pathFormatter(case['Mask']) + + # Write out results + if self.args.format == 'csv': + writer = csv.DictWriter(self.args.out, headers, lineterminator='\n') + if case_idx == 1: + writer.writeheader() + writer.writerow(case) # if skip_nans is enabled, nan-values are written as empty strings + elif self.args.format == 'json': + json.dump(case, self.args.out) + self.args.out.write('\n') + else: # txt + for k, v in six.iteritems(case): + self.args.out.write('Case-%d_%s: %s\n' % (case_idx, k, v)) + + def _buildGenerator(self, cases): + setting_overrides = self._parseOverrides() + + # Section for deprecated argument label + if self.args.label is not None: + self.logger.warning( + 'Argument "label" is deprecated. To specify a custom label, use argument "setting" as follows:' + '"--setting=label:N", where N is the a label value.') + setting_overrides['label'] = self.args.label + # End deprecated section + + for case_idx, case in enumerate(cases, start=1): + yield case_idx, case, self.args.param, setting_overrides + + def _parseOverrides(self): + setting_overrides = {} + + # parse overrides + if len(self.args.setting) == 0: + self.logger.debug('No overrides found') + return setting_overrides + + self.logger.debug('Reading parameter schema') + schemaFile, schemaFuncs = radiomics.getParameterValidationFiles() + with open(schemaFile) as schema: + settingsSchema = yaml.load(schema)['mapping']['setting']['mapping'] + + # parse single value function + def parse_value(value, value_type): + if value_type == 'str': + return value # no conversion + elif value_type == 'int': + return int(value) + elif value_type == 'float': + return float(value) + elif value_type == 'bool': + return value == '1' or value.lower() == 'true' + else: + raise ValueError('Cannot understand value_type "%s"' % value_type) + + for setting in self.args.setting: # setting = "setting_key:setting_value" + if ':' not in setting: + self.logger.warning('Incorrect format for override setting "%s", missing ":"', setting) + continue + # split into key and value + setting_key, setting_value = setting.split(':', 2) + + # Check if it is a valid PyRadiomics Setting + if setting_key not in settingsSchema: + self.logger.warning('Did not recognize override "%s", skipping...', setting_key) + continue + + # Try to parse the value by looking up its type in the settingsSchema + try: + setting_def = settingsSchema[setting_key] + setting_type = 'str' # If type is omitted in the schema, treat it as string (no conversion) + if 'seq' in setting_def: + # Multivalued setting + if len(setting_def['seq']) > 0 and 'type' in setting_def['seq'][0]: + setting_type = setting_def['seq'][0]['type'] + + setting_overrides[setting_key] = [parse_value(val, setting_type) for val in setting_value.split(',')] + self.logger.debug('Parsed "%s" as list (element type "%s"); value: %s', + setting_key, setting_type, setting_overrides[setting_key]) + else: + if 'type' in setting_def: + setting_type = setting_def['type'] + setting_overrides[setting_key] = parse_value(setting_value, setting_type) + self.logger.debug('Parsed "%s" as type "%s"; value: %s', setting_key, setting_type, + setting_overrides[setting_key]) + + except (KeyboardInterrupt, SystemExit): + raise + except Exception: + self.logger.warning('Could not parse value "%s" for setting "%s", skipping...', setting_value, setting_key) - logfileLevel = getattr(logging, args.logging_level) - verboseLevel = (6 - args.verbosity) * 10 # convert to python logging level - logger_level = min(logfileLevel, verboseLevel) + return setting_overrides - logging_config = { - 'version': 1, - 'disable_existing_loggers': False, - 'formatters': { - 'default': { - 'format': '[%(asctime)s] %(levelname)-.1s: %(name)s: %(message)s', - 'datefmt': '%Y-%m-%d %H:%M:%S' - } - }, - 'handlers': { - 'console': { - 'class': 'logging.StreamHandler', - 'level': verboseLevel, - 'formatter': 'default' - } - }, - 'loggers': { - 'radiomics': { - 'level': logger_level, - 'handlers': ['console'] + def _configureLogging(self): + # Listener to process log messages from child processes in case of multiprocessing + queue_listener = None + + logfileLevel = getattr(logging, self.args.logging_level) + verboseLevel = (6 - self.args.verbosity) * 10 # convert to python logging level + logger_level = min(logfileLevel, verboseLevel) + + logging_config = { + 'version': 1, + 'disable_existing_loggers': False, + 'formatters': { + 'default': { + 'format': '[%(asctime)s] %(levelname)-.1s: %(name)s: %(message)s', + 'datefmt': '%Y-%m-%d %H:%M:%S' + } + }, + 'handlers': { + 'console': { + 'class': 'logging.StreamHandler', + 'level': verboseLevel, + 'formatter': 'default' + } + }, + 'loggers': { + 'radiomics': { + 'level': logger_level, + 'handlers': ['console'] + } } } - } - - if args.jobs > 1: - # Update the logger format to include the threadname if multiprocessing - # is enabled - logging_config['formatters']['default']['format'] = \ - '[%(asctime)s] %(levelname)-.1s: (%(threadName)s) %(name)s: %(message)s' - - # Set up optional logging to file - if args.log_file is not None: - py_version = (sys.version_info.major, sys.version_info.minor) - if args.jobs > 1 and py_version >= (3, 2): - # Multiprocessing! Use a QueueHandler, FileHandler and QueueListener - # to implement thread-safe logging. - - # However, QueueHandler and Listener were added in python 3.2. - # Therefore, only use this if the python version > 3.2 - q = Manager().Queue(-1) - threading.current_thread().setName('Main') - - logging_config['handlers']['logfile'] = { - 'class': 'logging.handlers.QueueHandler', - 'queue': q, - 'level': logfileLevel, - 'formatter': 'default' - } - file_handler = logging.FileHandler(filename=args.log_file, mode='a') - file_handler.setFormatter(logging.Formatter(fmt=logging_config['formatters']['default'].get('format'), - datefmt=logging_config['formatters']['default'].get('datefmt'))) + if self.args.jobs > 1: + # Update the logger format to include the threadname if multiprocessing + # is enabled + logging_config['formatters']['default']['format'] = \ + '[%(asctime)s] %(levelname)-.1s: (%(threadName)s) %(name)s: %(message)s' + + # Set up optional logging to file + if self.args.log_file is not None: + py_version = (sys.version_info.major, sys.version_info.minor) + if self.args.jobs > 1 and py_version >= (3, 2): + # Multiprocessing! Use a QueueHandler, FileHandler and QueueListener + # to implement thread-safe logging. + + # However, QueueHandler and Listener were added in python 3.2. + # Therefore, only use this if the python version > 3.2 + q = Manager().Queue(-1) + threading.current_thread().setName('Main') + + logging_config['handlers']['logfile'] = { + 'class': 'logging.handlers.QueueHandler', + 'queue': q, + 'level': logfileLevel, + 'formatter': 'default' + } + + file_handler = logging.FileHandler(filename=self.args.log_file, mode='a') + file_handler.setFormatter(logging.Formatter(fmt=logging_config['formatters']['default'].get('format'), + datefmt=logging_config['formatters']['default'].get('datefmt'))) + + queue_listener = logging.handlers.QueueListener(q, file_handler) + queue_listener.start() + else: + logging_config['handlers']['logfile'] = { + 'class': 'logging.FileHandler', + 'filename': self.args.log_file, + 'mode': 'a', + 'level': logfileLevel, + 'formatter': 'default' + } + logging_config['loggers']['radiomics']['handlers'].append('logfile') - queue_listener = logging.handlers.QueueListener(q, file_handler) - queue_listener.start() - else: - logging_config['handlers']['logfile'] = { - 'class': 'logging.FileHandler', - 'filename': args.log_file, - 'mode': 'a', - 'level': logfileLevel, - 'formatter': 'default' - } - logging_config['loggers']['radiomics']['handlers'].append('logfile') + logging.config.dictConfig(logging_config) - logging.config.dictConfig(logging_config) + self.logger.debug('Logging initialized') + return logging_config, queue_listener - scriptlogger.debug('Logging initialized') - return logging_config, queue_listener + +def parse_args(): + try: + return PyRadiomicsCommandLine().run() + except Exception as e: + logging.getLogger().error("Error executing PyRadiomics command line!", exc_info=True) + print("Error executing PyRadiomics command line!\n%s" % e.message) + return 4 diff --git a/radiomics/scripts/segment.py b/radiomics/scripts/segment.py index a1a00057..db55adac 100644 --- a/radiomics/scripts/segment.py +++ b/radiomics/scripts/segment.py @@ -17,7 +17,38 @@ _parallel_extraction_configured = False -def extractSegment(case_idx, case, config, config_override): +def extractSegment(case_idx, case, config, config_override, out_dir): + global caseLogger + + if out_dir is None: + return _extractFeatures(case_idx, case, config, config_override) + + filename = os.path.join(out_dir, 'features_%s.csv' % case_idx) + if os.path.isfile(filename): + # Output already generated, load result (prevents re-extraction in case of interrupted process) + with open(filename, 'w') as outputFile: + reader = csv.reader(outputFile) + headers = reader.rows[0] + values = reader.rows[1] + feature_vector = OrderedDict(zip(headers, values)) + + caseLogger.info('Patient %s already processed, reading results...', case_idx) + else: + # Extract the set of features. Set parallel_config flag to None, as any logging initialization is already handled. + feature_vector = _extractFeatures(case_idx, case, config, config_override) + + # Store results in temporary separate files to prevent write conflicts + # This allows for the extraction to be interrupted. Upon restarting, already processed cases are found in the + # TEMP_DIR directory and loaded instead of re-extracted + with open(filename, 'w') as outputFile: + writer = csv.DictWriter(outputFile, fieldnames=list(feature_vector.keys()), lineterminator='\n') + writer.writeheader() + writer.writerow(feature_vector) + + return feature_vector + + +def _extractFeatures(case_idx, case, config, config_override): global caseLogger # Instantiate the output @@ -54,113 +85,19 @@ def extractSegment(case_idx, case, config, config_override): return feature_vector -def extractSegment_parallel(args, logging_config=None): - try: - if logging_config is not None: - # set thread name to patient name - threading.current_thread().name = 'case %s' % args[0] # args[0] = case_idx - _configureParallelExtraction(logging_config) - return extractSegment(*args) - except (KeyboardInterrupt, SystemExit): - # Catch the error here, as this represents the interrupt of the child process. - # The main process is also interrupted, and cancellation is further handled there - return None - - -def extractSegmentWithTempFiles(case_idx, case, config, config_override, temp_dir): - global caseLogger - - filename = os.path.join(temp_dir, 'features_%s.csv' % case_idx) - if os.path.isfile(filename): - # Output already generated, load result (prevents re-extraction in case of interrupted process) - with open(filename, 'w') as outputFile: - reader = csv.reader(outputFile) - headers = reader.rows[0] - values = reader.rows[1] - feature_vector = OrderedDict(zip(headers, values)) - - caseLogger.info('Patient %s already processed, reading results...', case_idx) - else: - # Extract the set of features. Set parallel_config flag to None, as any logging initialization is already handled. - feature_vector = extractSegment(case_idx, case, config, config_override) - - # Store results in temporary separate files to prevent write conflicts - # This allows for the extraction to be interrupted. Upon restarting, already processed cases are found in the - # TEMP_DIR directory and loaded instead of re-extracted - with open(filename, 'w') as outputFile: - writer = csv.DictWriter(outputFile, fieldnames=list(feature_vector.keys()), lineterminator='\n') - writer.writeheader() - writer.writerow(feature_vector) - - return feature_vector - - -def extractSegmentWithTempFiles_parallel(args, logging_config=None): +def extractSegment_parallel(args, out_dir=None, logging_config=None): try: if logging_config is not None: # set thread name to patient name threading.current_thread().name = 'case %s' % args[0] # args[0] = case_idx _configureParallelExtraction(logging_config) - return extractSegmentWithTempFiles(*args) + return extractSegment(*args, out_dir=out_dir) except (KeyboardInterrupt, SystemExit): # Catch the error here, as this represents the interrupt of the child process. # The main process is also interrupted, and cancellation is further handled there return None -def processOutput(results, - outStream, - skip_nans=False, - format_output='csv', - format_path='absolute', - relative_path_start=''): - global caseLogger - caseLogger.info('Processing results...') - - # Store the header of all calculated features - headers = results[0].keys() - - # Set the formatting rule for image and mask paths - if format_path == 'absolute': - pathFormatter = os.path.abspath - elif format_path == 'relative': - pathFormatter = partial(os.path.relpath, start=relative_path_start) - elif format_path == 'basename': - pathFormatter = os.path.basename - else: - caseLogger.warning('Unrecognized format for paths (%s), reverting to default ("absolute")', format_path) - pathFormatter = os.path.abspath - - for case_idx, case in enumerate(results, start=1): - # if specified, skip NaN values - if skip_nans: - for key in list(case.keys()): - if isinstance(case[key], float) and numpy.isnan(case[key]): - caseLogger.debug('Case %d, feature %s computed NaN, removing from results', case_idx, key) - del case[key] - - # Format paths of image and mask files - case['Image'] = pathFormatter(case['Image']) - case['Mask'] = pathFormatter(case['Mask']) - - # Write out results - if format_output not in ('csv', 'json', 'txt'): - caseLogger.warning('Unrecognized format for output (%s), reverting to default ("csv")', format_output) - format_output = 'csv' - - if format_output == 'csv': - writer = csv.DictWriter(outStream, headers, lineterminator='\n') - if case_idx == 1: - writer.writeheader() - writer.writerow(case) # if skip_nans is enabled, nan-values are written as empty strings - elif format_output == 'json': - json.dump(case, outStream) - outStream.write('\n') - else: # txt - for k, v in six.iteritems(case): - outStream.write('Case-%d_%s: %s\n' % (case_idx, k, v)) - - def _configureParallelExtraction(logging_config, add_info_filter=True): """ Initialize logging for parallel extraction. This needs to be done here, as it needs to be done for each thread that is diff --git a/radiomics/scripts/voxel.py b/radiomics/scripts/voxel.py new file mode 100644 index 00000000..4c9719fe --- /dev/null +++ b/radiomics/scripts/voxel.py @@ -0,0 +1,125 @@ +from collections import OrderedDict +import csv +from datetime import datetime +from functools import partial +import json +import logging.config +import os +import threading + +import numpy +import SimpleITK as sitk +import six + +import radiomics.featureextractor + +caseLogger = logging.getLogger('radiomics.script') +_parallel_extraction_configured = False + + +def extractVoxel(case_idx, case, config, config_override, out_dir): + global caseLogger + + # Instantiate the output + feature_vector = OrderedDict(case) + + try: + if out_dir is None: + out_dir = '.' + elif not os.path.isdir(out_dir): + caseLogger.debug('Creating output directory at %s' % out_dir) + os.makedirs(out_dir) + + caseLogger.info('Processing case %s', case_idx) + t = datetime.now() + + imageFilepath = case['Image'] # Required + maskFilepath = case['Mask'] # Required + label = case.get('Label', None) # Optional + if isinstance(label, six.string_types): + label = int(label) + + # Instantiate Radiomics Feature extractor + extractor = radiomics.featureextractor.RadiomicsFeaturesExtractor(config, **config_override) + + # Extract features + result = extractor.execute(imageFilepath, maskFilepath, label, voxelBased=True) + + for k in result: + if isinstance(result[k], sitk.Image): + target = os.path.join(out_dir, 'Case-%i_%s.nrrd' % (case_idx, k)) + sitk.WriteImage(result[k], target, True) + feature_vector[k] = target + else: + feature_vector[k] = result[k] + + # Display message + delta_t = datetime.now() - t + caseLogger.info('Case %s processed in %s', case_idx, delta_t) + + except (KeyboardInterrupt, SystemExit): # Cancel extraction by forwarding this 'error' + raise + except SystemError: + # Occurs when Keyboard Interrupt is caught while the thread is processing a SimpleITK call + raise KeyboardInterrupt() + except Exception: + caseLogger.error('Feature extraction failed!', exc_info=True) + + return feature_vector + + +def extractVoxel_parallel(args, out_dir=None, logging_config=None): + try: + if logging_config is not None: + # set thread name to patient name + threading.current_thread().name = 'case %s' % args[0] # args[0] = case_idx + _configureParallelExtraction(logging_config) + return extractVoxel(*args, out_dir=out_dir) + except (KeyboardInterrupt, SystemExit): + # Catch the error here, as this represents the interrupt of the child process. + # The main process is also interrupted, and cancellation is further handled there + return None + + +def _configureParallelExtraction(logging_config, add_info_filter=True): + """ + Initialize logging for parallel extraction. This needs to be done here, as it needs to be done for each thread that is + created. + """ + global _parallel_extraction_configured + if _parallel_extraction_configured: + return + + # Configure logging + ################### + + logging.config.dictConfig(logging_config) + + if add_info_filter: + # Define filter that allows messages from specified filter and level INFO and up, and level WARNING and up from + # other loggers. + class info_filter(logging.Filter): + def __init__(self, name): + super(info_filter, self).__init__(name) + self.level = logging.WARNING + + def filter(self, record): + if record.levelno >= self.level: + return True + if record.name == self.name and record.levelno >= logging.INFO: + return True + return False + + # Adding the filter to the first handler of the radiomics logger limits the info messages on the output to just + # those from radiomics.script, but warnings and errors from the entire library are also printed to the output. + # This does not affect the amount of logging stored in the log file. + outputhandler = radiomics.logger.handlers[0] # Handler printing to the output + outputhandler.addFilter(info_filter('radiomics.script')) + + # Ensure the entire extraction for each cases is handled on 1 thread + #################################################################### + + sitk.ProcessObject_SetGlobalDefaultNumberOfThreads(1) + + _parallel_extraction_configured = True + radiomics.logger.debug('parallel extraction configured') From 92e09879f54c938e36090189a72cf436cd12b19e Mon Sep 17 00:00:00 2001 From: Joost van Griethuysen Date: Thu, 31 Jan 2019 11:34:01 +0000 Subject: [PATCH 2/4] DOCS: Update the docs to reflect voxel-based functionality --- README.md | 5 +++- docs/index.rst | 4 +++- docs/usage.rst | 65 +++++++++++++++++++++++++------------------------- 3 files changed, 39 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 7c0e300a..571f9fa1 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,10 @@ With this package we aim to establish a reference standard for Radiomic Analysis open-source platform for easy and reproducible Radiomic Feature extraction. By doing so, we hope to increase awareness of radiomic capabilities and expand the community. -The platform supports both the feature extraction in 2D and 3D. **Not intended for clinical use.** +The platform supports both the feature extraction in 2D and 3D and can be used to calculate single values per feature +for a region of interest ("segment-based") or to generate feature maps ("voxel-based"). + +**Not intended for clinical use.** **If you publish any work which uses this package, please cite the following publication:** *van Griethuysen, J. J. M., Fedorov, A., Parmar, C., Hosny, A., Aucoin, N., Narayan, V., Beets-Tan, R. G. H., diff --git a/docs/index.rst b/docs/index.rst index 5808f626..43c6b451 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,7 +9,9 @@ Welcome to pyradiomics documentation! This is an open-source python package for the extraction of Radiomics features from medical imaging. With this package we aim to establish a reference standard for Radiomic Analysis, and provide a tested and maintained open-source platform for easy and reproducible Radiomic Feature extraction. By doing so, we hope to increase awareness -of radiomic capabilities and expand the community. The platform supports both the feature extraction in 2D and 3D. +of radiomic capabilities and expand the community. The platform supports both the feature extraction in 2D and 3D and +can be used to calculate single values per feature for a region of interest ("segment-based") or to generate feature +maps ("voxel-based"). **If you publish any work which uses this package, please cite the following publication:** *van Griethuysen, J. J. M., Fedorov, A., Parmar, C., Hosny, A., Aucoin, N., Narayan, V., Beets-Tan, R. G. H., diff --git a/docs/usage.rst b/docs/usage.rst index b693ae77..c2ff2155 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -28,9 +28,20 @@ Example * The parameter file used in the instruction video is available in ``pyradiomics/examples/exampleSettings`` -* If jupyter is not installed, run the python script alternative (``pyradiomics/examples/helloRadiomics.py``): +* If jupyter is not installed, run the python script alternatives contained in the folder (``pyradiomics/examples``): - * ``python helloRadiomics.py`` + * ``python helloRadiomics.py`` (segment-based extraction) + * ``python helloVoxel.py`` (voxel-based extraction) + +---------------------- +Voxel-based extraction +---------------------- + +As of version 2.0, pyradiomics also implements a voxel-based extraction. It is both available from the command line and +in the interactive use. See below for details. + +Important to know here is that this extraction takes longer (features have to be calculated for each voxel), and that +the output is a SimpleITK image of the parameter map instead of a float value *for each feature*. ---------------- Command Line Use @@ -71,6 +82,12 @@ Command Line Use argument and/or by specifying override settings (only `type 3 customization `) in the ``--setting`` argument. Multiple overrides can be used by specifying ``--setting`` multiple times. +* To extract feature maps ("voxel-based" extraction), simply add the argument ``--mode voxel``. The calculated feature + maps are then stored as images (NRRD format) in the current working directory. The name convention used is + "Case-_.nrrd". An alternative output directory can be provided in the ``--out-dir`` command line + switch. The results that are printed to the console window or the out file will still contain the diagnostic + information, and the value of the extracted features is set to the location the feature maps are stored. + * For more information on the possible command line arguments, run:: pyradiomics -h @@ -91,9 +108,12 @@ Interactive Use * Import the necessary classes:: - from radiomics import featureextractor, getTestCase + import os + + import SimpleITK as sitk import six - import sys, os + + from radiomics import featureextractor, getTestCase * Set up a pyradiomics directory variable:: @@ -113,44 +133,23 @@ Interactive Use extractor = featureextractor.RadiomicsFeaturesExtractor(params) -* Calculate the features:: +* Calculate the features (segment-based):: result = extractor.execute(imageName, maskName) for key, val in six.iteritems(result): print("\t%s: %s" %(key, val)) -* See the :ref:`feature extractor class` for more information on using this core class. - ----------------------- -Voxel-based extraction ----------------------- - -As of version 2.0, pyradiomics also implements a voxel-based extraction. -Currently, this is only available in the interactive mode, and is as simple as telling the feature extractor to -extract a parameter map:: - - from radiomics import featureextractor, getTestCase - import six - import sys, os - - import SimpleITK as sitk - - dataDir = '/path/to/pyradiomics' - - imageName, maskName = getTestCase('brain1', dataDir) - params = os.path.join(dataDir, "examples", "exampleSettings", "exampleVoxel.yaml") - - extractor = featureextractor.RadiomicsFeaturesExtractor(params) +* Calculate the features (voxel-based):: result = extractor.execute(imageName, maskName, voxelBased=True) - for key, val in six.iteritems(result): - sitk.WriteImage(val, key + 'nrrd') + if isinstance(val, sitk.Image): # Feature map + sitk.WriteImage(val, key + '.nrrd', True) + print("Stored feature %s in %s" % (key, key + ".nrrd") + else: # Diagnostic information + print("\t%s: %s" %(key, val)) -Important to know here is that this extraction takes longer (features have to be calculated for each voxel), and that -the output is a SimpleITK image of the parameter map instead of a float value *for each feature*. - -Be sure to also check out the ``helloVoxel.py`` example available in the repository (folder ``examples``). +* See the :ref:`feature extractor class` for more information on using this core class. ------------------------ PyRadiomics in 3D Slicer From 36639a5c35bf6b534cb3735b3304ed8e17b2ab3e Mon Sep 17 00:00:00 2001 From: Joost van Griethuysen Date: Thu, 31 Jan 2019 11:49:24 +0000 Subject: [PATCH 3/4] DOCS: Add voxel settings to customization docs. --- docs/customization.rst | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/docs/customization.rst b/docs/customization.rst index 069a82cd..b66a0384 100644 --- a/docs/customization.rst +++ b/docs/customization.rst @@ -8,12 +8,13 @@ Customizing the Extraction Types of Customization ---------------------- -There are 3 ways in which the feature extraction can be customized in PyRadiomics: +There are 4 ways in which the feature extraction can be customized in PyRadiomics: 1. Specifying which image types (original/derived) to use to extract features from 2. Specifying which feature(class) to extract 3. Specifying settings, which control the pre processing and customize the behaviour of enabled filters and feature classes. +4. Specifying the voxel-based specific settings, which are only needed when using PyRadiomics to generate feature maps .. warning:: At initialization of the feature extractor or an individual feature class, settings can be provided as keyword @@ -349,13 +350,34 @@ Feature Class Specific Settings - ``gldm_a`` [0]: float, :math:`\alpha` cutoff value for dependence. A neighbouring voxel with gray level :math:`j` is considered dependent on center voxel with gray level :math:`i` if :math:`|i-j|\le\alpha` +.. _radiomics-voxel-settings-label: + +Voxel-based specific settings +############################# + +When using PyRadiomics to generate feature maps, additional customization options exist. These control the neighborhood +around each voxel that is used for calculation (kernel) and what the background value should be, i.e. the value of +voxels for which there is no calculated value. + +- ``kernelRadius`` [1]: integer, specifies the size of the kernel to use as the radius from the center voxel. Therefore + the actual size is ``2 * kernelRadius + 1``. E.g. a value of 1 yields a 3x3x3 kernel, a value of 2 5x5x5, etc. In case + of 2D extraction, the generated kernel will also be a 2D shape (square instead of cube). + +- ``maskedKernel`` [True]: boolean, specifies whether to mask the kernel with the overall mask. If True, only voxels in + the kernel that are also segmented in the mask are used for calculation. Otherwise, all voxels inside the kernel are + used. Moreover, gray value discretization is performed over the ROI if the setting is set to True, and over the entire + image if False. + +- ``initValue`` [0]: float, value to use for voxels outside the ROI, or voxels where calculation failed. If set to + ``nan``, 3D slicer will treat them as transparent voxels + .. _radiomics-parameter-file-label: -------------- Parameter File -------------- -All 3 categories of customization can be provided in a single yaml or JSON structured text file, which can be provided +All 4 categories of customization can be provided in a single yaml or JSON structured text file, which can be provided in an optional argument (``--param``) when running pyradiomics from the command line. In interactive mode, it can be provided during initialization of the :ref:`feature extractor `, or using :py:func:`~radiomics.featureextractor.RadiomicsFeaturesExtractor.loadParams` after initialization. This removes the need @@ -399,6 +421,8 @@ The three setting types are named as follows: is specified or is an empty list ('[]'), all features for this class are enabled. 3. **setting:** Setting to use for pre processing and class specific settings. if no is specified, the value for this setting is set to None. +4. **voxelSetting:** Settings used to control the voxel-based specific settings. E.g. the size of the kernel used and + the background value in the parameter maps. Example:: From ffd6ef69158c599ab65cb614345d77a69f2523fd Mon Sep 17 00:00:00 2001 From: Joost van Griethuysen Date: Thu, 31 Jan 2019 12:15:25 +0000 Subject: [PATCH 4/4] STYL: Fix flak8 errors --- radiomics/scripts/segment.py | 3 --- radiomics/scripts/voxel.py | 4 ---- 2 files changed, 7 deletions(-) diff --git a/radiomics/scripts/segment.py b/radiomics/scripts/segment.py index db55adac..93b4354e 100644 --- a/radiomics/scripts/segment.py +++ b/radiomics/scripts/segment.py @@ -1,13 +1,10 @@ from collections import OrderedDict import csv from datetime import datetime -from functools import partial -import json import logging.config import os import threading -import numpy import SimpleITK as sitk import six diff --git a/radiomics/scripts/voxel.py b/radiomics/scripts/voxel.py index 4c9719fe..faea49a9 100644 --- a/radiomics/scripts/voxel.py +++ b/radiomics/scripts/voxel.py @@ -1,13 +1,9 @@ from collections import OrderedDict -import csv from datetime import datetime -from functools import partial -import json import logging.config import os import threading -import numpy import SimpleITK as sitk import six