diff --git a/autosklearn/automl.py b/autosklearn/automl.py index c392065ee7..4f922049de 100644 --- a/autosklearn/automl.py +++ b/autosklearn/automl.py @@ -172,8 +172,8 @@ def __init__(self, memory_limit=3072, metadata_directory=None, debug_mode=False, - include=None, - exclude=None, + include: Optional[Dict[str, List[str]]] = None, + exclude: Optional[Dict[str, List[str]]] = None, resampling_strategy='holdout-iterative-fit', resampling_strategy_arguments=None, n_jobs=None, @@ -1844,10 +1844,14 @@ def show_models(self): return sio.getvalue() - def _create_search_space(self, tmp_dir, backend, datamanager, - include=None, - exclude=None, - ): + def _create_search_space( + self, + tmp_dir, + backend, + datamanager, + include: Optional[Dict[str, List[str]]] = None, + exclude: Optional[Dict[str, List[str]]] = None, + ): task_name = 'CreateConfigSpace' self._stopwatch.start_task(task_name) diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py index f38b58cc44..256c47934c 100644 --- a/autosklearn/estimators.py +++ b/autosklearn/estimators.py @@ -34,8 +34,8 @@ def __init__( max_models_on_disc=50, seed=1, memory_limit=3072, - include=None, - exclude=None, + include: Optional[Dict[str, List[str]]] = None, + exclude: Optional[Dict[str, List[str]]] = None, resampling_strategy='holdout', resampling_strategy_arguments=None, tmp_folder=None, @@ -97,24 +97,63 @@ def __init__( Memory limit in MB for the machine learning algorithm. `auto-sklearn` will stop fitting the machine learning algorithm if it tries to allocate more than ``memory_limit`` MB. - - **Important notes:** - + + **Important notes:** + * If ``None`` is provided, no memory limit is set. - * In case of multi-processing, ``memory_limit`` will be *per job*, so the total usage is + * In case of multi-processing, ``memory_limit`` will be *per job*, so the total usage is ``n_jobs x memory_limit``. * The memory limit also applies to the ensemble creation process. - include : dict, optional (None) - If None, all possible algorithms are used. Otherwise specifies - set of algorithms for each added component is used. Include and - exclude are incompatible if used together on the same component + include : Optional[Dict[str, List[str]]] = None + If None, all possible algorithms are used. + + Otherwise, specifies a step and the components that are included in search. + See ``/pipeline/components//*`` for available components. + + Incompatible with parameter ``exclude``. + + **Possible Steps**: + + * ``"data_preprocessor"`` + * ``"balancing"`` + * ``"feature_preprocessor"`` + * ``"classifier"`` - Only for when when using ``AutoSklearnClasssifier`` + * ``"regressor"`` - Only for when when using ``AutoSklearnRegressor`` + + **Example**: + + .. code-block:: python + + include = { + 'classifier': ["random_forest"], + 'feature_preprocessor': ["no_preprocessing"] + } + + exclude : Optional[Dict[str, List[str]]] = None + If None, all possible algorithms are used. + + Otherwise, specifies a step and the components that are excluded from search. + See ``/pipeline/components//*`` for available components. + + Incompatible with parameter ``include``. + + **Possible Steps**: + + * ``"data_preprocessor"`` + * ``"balancing"`` + * ``"feature_preprocessor"`` + * ``"classifier"`` - Only for when when using ``AutoSklearnClasssifier`` + * ``"regressor"`` - Only for when when using ``AutoSklearnRegressor`` + + **Example**: + + .. code-block:: python - exclude : dict, optional (None) - If None, all possible algorithms are used. Otherwise specifies - set of algorithms for each added component is not used. - Incompatible with include. Include and exclude are incompatible - if used together on the same component + exclude = { + 'classifier': ["random_forest"], + 'feature_preprocessor': ["no_preprocessing"] + } resampling_strategy : string or object, optional ('holdout') how to to handle overfitting, might need 'resampling_strategy_arguments' diff --git a/autosklearn/smbo.py b/autosklearn/smbo.py index 696e415a4b..3cb823f2ff 100644 --- a/autosklearn/smbo.py +++ b/autosklearn/smbo.py @@ -1,3 +1,4 @@ +from typing import Dict, List, Optional import copy import json import logging @@ -231,8 +232,8 @@ def __init__(self, config_space, dataset_name, metadata_directory=None, resampling_strategy='holdout', resampling_strategy_args=None, - include=None, - exclude=None, + include: Optional[Dict[str, List[str]]] = None, + exclude: Optional[Dict[str, List[str]]] = None, disable_file_output=False, smac_scenario_args=None, get_smac_object_callback=None,