diff --git a/docs/conf.py b/docs/conf.py index 11000d6..834e6ca 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -40,6 +40,7 @@ "sphinx_design", "sphinxcontrib.bibtex", "sphinxext_altair.altairplot", + "sphinxext.directives", "sphinxext.gallery", ] diff --git a/docs/sphinxext/directives.py b/docs/sphinxext/directives.py new file mode 100644 index 0000000..10a450a --- /dev/null +++ b/docs/sphinxext/directives.py @@ -0,0 +1,107 @@ +from docutils import nodes +from docutils.parsers.rst import Directive +from sphinx.locale import _ + + +def process_text(text): + """Process the text to identify and format literals.""" + parts = [] + start = 0 + while True: + start_literal = text.find("`", start) + if start_literal == -1: + parts.append(nodes.Text(text[start:])) + break + parts.append(nodes.Text(text[start:start_literal])) + end_literal = text.find("`", start_literal + 1) + if end_literal == -1: + break # unmatched backticks + literal_text = text[start_literal + 1 : end_literal] + parts.append(nodes.literal(literal_text, literal_text)) + start = end_literal + 1 + return parts + + +class div(nodes.General, nodes.Element): + @staticmethod + def visit_div(self, node): + self.body.append(self.starttag(node, "div")) + + @staticmethod + def depart_div(self, node=None): + self.body.append("\n") + + +class span(nodes.Inline, nodes.TextElement): + @staticmethod + def visit_span(self, node): + self.body.append(self.starttag(node, "span", "")) + + @staticmethod + def depart_span(self, node=None): + self.body.append("") + + +class SklearnVersionAddedDirective(Directive): + """Custom directive to denote the version additions to scikit-learn.""" + + required_arguments = 1 + optional_arguments = 0 + final_argument_whitespace = True + has_content = True + + def run(self): + text = None + if len(self.arguments[0].split("\n", 1)) > 1: + version, text = self.arguments[0].split("\n", 1) + else: + version = self.arguments[0] + container = div(classes=["versionadded"]) + paragraph = nodes.paragraph() + span_node = span( + "", + _(f"New in scikit-learn version {version}{'.' if text is None else ': '} "), + classes=["versionmodified", "added"], + ) + paragraph += span_node + if text is not None: + paragraph += process_text(text) + container += paragraph + self.state.nested_parse(self.content, self.content_offset, container) + return [container] + + +class SklearnVersionChangedDirective(Directive): + """Custom directive to denote the version changes to scikit-learn.""" + + required_arguments = 1 + optional_arguments = 0 + final_argument_whitespace = True + has_content = True + + def run(self): + text = None + if len(self.arguments[0].split("\n")) > 1: + version, text = self.arguments[0].split("\n", 1) + else: + version = self.arguments[0] + container = div(classes=["versionchanged"]) + paragraph = nodes.paragraph() + span_node = span( + "", + _(f"Changed in scikit-learn version {version}{'.' if text is None else ': '} "), + classes=["versionmodified", "changed"], + ) + paragraph += span_node + if text is not None: + paragraph += process_text(text) + container += paragraph + self.state.nested_parse(self.content, self.content_offset, container) + return [container] + + +def setup(app): + app.add_node(div, html=(div.visit_div, div.depart_div)) + app.add_node(span, html=(span.visit_span, span.depart_span)) + app.add_directive("sklearn-versionadded", SklearnVersionAddedDirective) + app.add_directive("sklearn-versionchanged", SklearnVersionChangedDirective) diff --git a/examples/plot_quantile_extrapolation.py b/examples/plot_quantile_extrapolation.py index b7803e7..b0a9a5a 100755 --- a/examples/plot_quantile_extrapolation.py +++ b/examples/plot_quantile_extrapolation.py @@ -319,7 +319,7 @@ def train_test_split(train_indices, **kwargs): def prob_randomized_pi(qmat, y, coverage): - """Calculate calibration probability""" + """Calculate calibration probability.""" alpha_included = np.mean((qmat[:, 0] <= y) & (y <= qmat[:, 1])) alpha_excluded = np.mean((qmat[:, 0] < y) & (y < qmat[:, 1])) if coverage <= alpha_excluded: diff --git a/quantile_forest/_quantile_forest.py b/quantile_forest/_quantile_forest.py index 6ff0777..7936ebf 100755 --- a/quantile_forest/_quantile_forest.py +++ b/quantile_forest/_quantile_forest.py @@ -875,11 +875,17 @@ class RandomForestQuantileRegressor(BaseForestQuantileRegressor): n_estimators : int, default=100 The number of trees in the forest. + .. sklearn-versionchanged:: 0.22 + The default value of `n_estimators` changed from 10 to 100 + in 0.22. + default_quantiles : float, list, or "mean", default=0.5 The default quantile or list of quantiles that the model tries to predict. Each quantile must be strictly between 0 and 1. If "mean", the model predicts the mean. + .. versionadded:: 1.2 + criterion : {"squared_error", "absolute_error", "friedman_mse", "poisson"}, \ default="squared_error" The function to measure the quality of a split. Supported criteria @@ -893,6 +899,12 @@ class RandomForestQuantileRegressor(BaseForestQuantileRegressor): Training using "absolute_error" is significantly slower than when using "squared_error". + .. sklearn-versionadded:: 0.18 + Mean Absolute Error (MAE) criterion. + + .. sklearn-versionadded:: 1.0 + Poisson criterion. + max_depth : int, default=None The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than @@ -906,6 +918,9 @@ class RandomForestQuantileRegressor(BaseForestQuantileRegressor): `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. + .. sklearn-versionchanged:: 0.18 + Added float values for fractions. + min_samples_leaf : int or float, default=1 The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at @@ -918,6 +933,9 @@ class RandomForestQuantileRegressor(BaseForestQuantileRegressor): `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. + .. sklearn-versionchanged:: 0.18 + Added float values for fractions. + max_samples_leaf : int, float or None, default=1 The maximum number of samples permitted to be at a leaf node. @@ -947,6 +965,10 @@ class RandomForestQuantileRegressor(BaseForestQuantileRegressor): The default of 1.0 is equivalent to bagged trees and more randomness can be achieved by setting smaller values, e.g. 0.3. + .. sklearn-versionchanged:: 1.1 + The default of `max_features` changed from `"auto"` to 1.0. + + Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features. @@ -972,6 +994,8 @@ class RandomForestQuantileRegressor(BaseForestQuantileRegressor): ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. + .. sklearn-versionadded:: 0.19 + bootstrap : bool, default=True Whether bootstrap samples are used when building trees. If False, the whole dataset is used to build each tree. @@ -1007,6 +1031,8 @@ class RandomForestQuantileRegressor(BaseForestQuantileRegressor): subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. By default, no pruning is performed. + .. sklearn-versionadded:: 0.22 + max_samples : int or float, default=None If bootstrap is True, the number of samples to draw from X to train each base estimator. @@ -1016,6 +1042,8 @@ class RandomForestQuantileRegressor(BaseForestQuantileRegressor): - If float, then draw `max(round(n_samples * max_samples), 1)` samples. Thus, `max_samples` should be in the interval `(0.0, 1.0]`. + .. sklearn-versionadded:: 0.22 + monotonic_cst : array-like of int of shape (n_features), default=None Indicates the monotonicity constraint to enforce on each feature. - 1: monotonically increasing @@ -1028,12 +1056,17 @@ class RandomForestQuantileRegressor(BaseForestQuantileRegressor): - multioutput regressions (i.e. when `n_outputs_ > 1`), - regressions trained on data with missing values. + .. sklearn-versionadded:: 1.4 + Attributes ---------- estimator_ : :class:`~sklearn.tree.DecisionTreeRegressor` The child estimator template used to create the collection of fitted sub-estimators. + .. sklearn-versionadded:: 1.2 + `base_estimator_` was renamed to `estimator_`. + estimators_ : list of DecisionTreeRegressor The collection of fitted sub-estimators. @@ -1050,10 +1083,14 @@ class RandomForestQuantileRegressor(BaseForestQuantileRegressor): n_features_in_ : int Number of features seen during :term:`fit`. + .. sklearn-versionadded:: 0.24 + feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during :term:`fit`. Defined only when `X` has feature names that are all strings. + .. sklearn-versionadded:: 1.0 + n_outputs_ : int The number of outputs when ``fit`` is performed. @@ -1069,6 +1106,8 @@ class RandomForestQuantileRegressor(BaseForestQuantileRegressor): The subset of drawn samples (i.e., the in-bag samples) for each base estimator. Each subset is defined by an array of the indices selected. + .. sklearn-versionadded:: 1.4 + See Also -------- ExtraTreesQuantileRegressor : Quantile ensemble of extremely randomized @@ -1138,6 +1177,7 @@ def __init__( warm_start=False, ccp_alpha=0.0, max_samples=None, + monotonic_cst=None, ): """Initialize random forest quantile regressor.""" init_dict = { @@ -1164,6 +1204,8 @@ def __init__( "max_samples": max_samples, "max_samples_leaf": max_samples_leaf, } + if sklearn_version >= parse_version("1.4.0"): + init_dict["estimator_params"] += ("monotonic_cst",) super(RandomForestQuantileRegressor, self).__init__(**init_dict) self.default_quantiles = default_quantiles @@ -1177,6 +1219,7 @@ def __init__( self.max_leaf_nodes = max_leaf_nodes self.min_impurity_decrease = min_impurity_decrease self.ccp_alpha = ccp_alpha + self.monotonic_cst = monotonic_cst def _more_tags(self): return { @@ -1201,11 +1244,17 @@ class ExtraTreesQuantileRegressor(BaseForestQuantileRegressor): n_estimators : int, default=100 The number of trees in the forest. + .. sklearn-versionchanged:: 0.22 + The default value of `n_estimators` changed from 10 to 100 + in 0.22. + default_quantiles : float, list, or "mean", default=0.5 The default quantile or list of quantiles that the model tries to predict. Each quantile must be strictly between 0 and 1. If "mean", the model predicts the mean. + .. versionadded:: 1.2 + criterion : {"squared_error", "absolute_error", "friedman_mse", "poisson"}, \ default="squared_error" The function to measure the quality of a split. Supported criteria @@ -1232,6 +1281,9 @@ class ExtraTreesQuantileRegressor(BaseForestQuantileRegressor): `ceil(min_samples_split * n_samples)` are the minimum number of samples for each split. + .. sklearn-versionchanged:: 0.18 + Added float values for fractions. + min_samples_leaf : int or float, default=1 The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at @@ -1244,6 +1296,9 @@ class ExtraTreesQuantileRegressor(BaseForestQuantileRegressor): `ceil(min_samples_leaf * n_samples)` are the minimum number of samples for each node. + .. sklearn-versionchanged:: 0.18 + Added float values for fractions. + max_samples_leaf : int, float or None, default=1 The maximum number of samples permitted to be at a leaf node. @@ -1273,6 +1328,9 @@ class ExtraTreesQuantileRegressor(BaseForestQuantileRegressor): The default of 1.0 is equivalent to bagged trees and more randomness can be achieved by setting smaller values, e.g. 0.3. + .. sklearn-versionchanged:: 1.1 + The default of `max_features` changed from `"auto"` to `"sqrt"`. + Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features. @@ -1298,6 +1356,8 @@ class ExtraTreesQuantileRegressor(BaseForestQuantileRegressor): ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. + .. sklearn-versionadded:: 0.19 + bootstrap : bool, default=False Whether bootstrap samples are used when building trees. If False, the whole dataset is used to build each tree. @@ -1336,6 +1396,8 @@ class ExtraTreesQuantileRegressor(BaseForestQuantileRegressor): subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. By default, no pruning is performed. + .. sklearn-versionadded:: 0.22 + max_samples : int or float, default=None If bootstrap is True, the number of samples to draw from X to train each base estimator. @@ -1357,12 +1419,17 @@ class ExtraTreesQuantileRegressor(BaseForestQuantileRegressor): - multioutput regressions (i.e. when `n_outputs_ > 1`), - regressions trained on data with missing values. + .. sklearn-versionadded:: 1.4 + Attributes ---------- estimator_ : :class:`~sklearn.tree.ExtraTreeRegressor` The child estimator template used to create the collection of fitted sub-estimators. + .. sklearn-versionadded:: 1.2 + `base_estimator_` was renamed to `estimator_`. + estimators_ : list of DecisionTreeRegressor The collection of fitted sub-estimators. @@ -1379,10 +1446,14 @@ class ExtraTreesQuantileRegressor(BaseForestQuantileRegressor): n_features_in_ : int Number of features seen during :term:`fit`. + .. sklearn-versionadded:: 0.24 + feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during :term:`fit`. Defined only when `X` has feature names that are all strings. + .. sklearn-versionadded:: 1.0 + n_outputs_ : int The number of outputs. @@ -1398,6 +1469,8 @@ class ExtraTreesQuantileRegressor(BaseForestQuantileRegressor): The subset of drawn samples (i.e., the in-bag samples) for each base estimator. Each subset is defined by an array of the indices selected. + .. sklearn-versionadded:: 1.4 + See Also -------- RandomForestQuantileRegressor : Quantile ensemble regressor using trees. @@ -1453,6 +1526,7 @@ def __init__( warm_start=False, ccp_alpha=0.0, max_samples=None, + monotonic_cst=None, ): """Initialize extra trees quantile regressor.""" init_dict = { @@ -1479,6 +1553,8 @@ def __init__( "max_samples": max_samples, "max_samples_leaf": max_samples_leaf, } + if sklearn_version >= parse_version("1.4.0"): + init_dict["estimator_params"] += ("monotonic_cst",) super(ExtraTreesQuantileRegressor, self).__init__(**init_dict) self.default_quantiles = default_quantiles @@ -1492,6 +1568,7 @@ def __init__( self.max_leaf_nodes = max_leaf_nodes self.min_impurity_decrease = min_impurity_decrease self.ccp_alpha = ccp_alpha + self.monotonic_cst = monotonic_cst def _more_tags(self): return {