diff --git a/docs/conf.py b/docs/conf.py
index 11000d6..834e6ca 100755
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -40,6 +40,7 @@
     "sphinx_design",
     "sphinxcontrib.bibtex",
     "sphinxext_altair.altairplot",
+    "sphinxext.directives",
     "sphinxext.gallery",
 ]
 
diff --git a/docs/sphinxext/directives.py b/docs/sphinxext/directives.py
new file mode 100644
index 0000000..10a450a
--- /dev/null
+++ b/docs/sphinxext/directives.py
@@ -0,0 +1,107 @@
+from docutils import nodes
+from docutils.parsers.rst import Directive
+from sphinx.locale import _
+
+
+def process_text(text):
+    """Process the text to identify and format literals."""
+    parts = []
+    start = 0
+    while True:
+        start_literal = text.find("`", start)
+        if start_literal == -1:
+            parts.append(nodes.Text(text[start:]))
+            break
+        parts.append(nodes.Text(text[start:start_literal]))
+        end_literal = text.find("`", start_literal + 1)
+        if end_literal == -1:
+            break  # unmatched backticks
+        literal_text = text[start_literal + 1 : end_literal]
+        parts.append(nodes.literal(literal_text, literal_text))
+        start = end_literal + 1
+    return parts
+
+
+class div(nodes.General, nodes.Element):
+    @staticmethod
+    def visit_div(self, node):
+        self.body.append(self.starttag(node, "div"))
+
+    @staticmethod
+    def depart_div(self, node=None):
+        self.body.append("</div>\n")
+
+
+class span(nodes.Inline, nodes.TextElement):
+    @staticmethod
+    def visit_span(self, node):
+        self.body.append(self.starttag(node, "span", ""))
+
+    @staticmethod
+    def depart_span(self, node=None):
+        self.body.append("</span>")
+
+
+class SklearnVersionAddedDirective(Directive):
+    """Custom directive to denote the version additions to scikit-learn."""
+
+    required_arguments = 1
+    optional_arguments = 0
+    final_argument_whitespace = True
+    has_content = True
+
+    def run(self):
+        text = None
+        if len(self.arguments[0].split("\n", 1)) > 1:
+            version, text = self.arguments[0].split("\n", 1)
+        else:
+            version = self.arguments[0]
+        container = div(classes=["versionadded"])
+        paragraph = nodes.paragraph()
+        span_node = span(
+            "",
+            _(f"New in scikit-learn version {version}{'.' if text is None else ': '} "),
+            classes=["versionmodified", "added"],
+        )
+        paragraph += span_node
+        if text is not None:
+            paragraph += process_text(text)
+        container += paragraph
+        self.state.nested_parse(self.content, self.content_offset, container)
+        return [container]
+
+
+class SklearnVersionChangedDirective(Directive):
+    """Custom directive to denote the version changes to scikit-learn."""
+
+    required_arguments = 1
+    optional_arguments = 0
+    final_argument_whitespace = True
+    has_content = True
+
+    def run(self):
+        text = None
+        if len(self.arguments[0].split("\n")) > 1:
+            version, text = self.arguments[0].split("\n", 1)
+        else:
+            version = self.arguments[0]
+        container = div(classes=["versionchanged"])
+        paragraph = nodes.paragraph()
+        span_node = span(
+            "",
+            _(f"Changed in scikit-learn version {version}{'.' if text is None else ': '} "),
+            classes=["versionmodified", "changed"],
+        )
+        paragraph += span_node
+        if text is not None:
+            paragraph += process_text(text)
+        container += paragraph
+        self.state.nested_parse(self.content, self.content_offset, container)
+        return [container]
+
+
+def setup(app):
+    app.add_node(div, html=(div.visit_div, div.depart_div))
+    app.add_node(span, html=(span.visit_span, span.depart_span))
+    app.add_directive("sklearn-versionadded", SklearnVersionAddedDirective)
+    app.add_directive("sklearn-versionchanged", SklearnVersionChangedDirective)
diff --git a/examples/plot_quantile_extrapolation.py b/examples/plot_quantile_extrapolation.py
index b7803e7..b0a9a5a 100755
--- a/examples/plot_quantile_extrapolation.py
+++ b/examples/plot_quantile_extrapolation.py
@@ -319,7 +319,7 @@ def train_test_split(train_indices, **kwargs):
 
 
 def prob_randomized_pi(qmat, y, coverage):
-    """Calculate calibration probability"""
+    """Calculate calibration probability."""
     alpha_included = np.mean((qmat[:, 0] <= y) & (y <= qmat[:, 1]))
     alpha_excluded = np.mean((qmat[:, 0] < y) & (y < qmat[:, 1]))
     if coverage <= alpha_excluded:
diff --git a/quantile_forest/_quantile_forest.py b/quantile_forest/_quantile_forest.py
index 6ff0777..7936ebf 100755
--- a/quantile_forest/_quantile_forest.py
+++ b/quantile_forest/_quantile_forest.py
@@ -875,11 +875,17 @@ class RandomForestQuantileRegressor(BaseForestQuantileRegressor):
     n_estimators : int, default=100
         The number of trees in the forest.
 
+        .. sklearn-versionchanged:: 0.22
+           The default value of `n_estimators` changed from 10 to 100
+           in 0.22.
+
     default_quantiles : float, list, or "mean", default=0.5
         The default quantile or list of quantiles that the model tries to
         predict. Each quantile must be strictly between 0 and 1. If "mean",
         the model predicts the mean.
 
+        .. versionadded:: 1.2
+
     criterion : {"squared_error", "absolute_error", "friedman_mse", "poisson"}, \
             default="squared_error"
         The function to measure the quality of a split. Supported criteria
@@ -893,6 +899,12 @@ class RandomForestQuantileRegressor(BaseForestQuantileRegressor):
         Training using "absolute_error" is significantly slower
         than when using "squared_error".
 
+        .. sklearn-versionadded:: 0.18
+           Mean Absolute Error (MAE) criterion.
+
+        .. sklearn-versionadded:: 1.0
+           Poisson criterion.
+
     max_depth : int, default=None
         The maximum depth of the tree. If None, then nodes are expanded until
         all leaves are pure or until all leaves contain less than
@@ -906,6 +918,9 @@ class RandomForestQuantileRegressor(BaseForestQuantileRegressor):
           `ceil(min_samples_split * n_samples)` are the minimum
           number of samples for each split.
 
+        .. sklearn-versionchanged:: 0.18
+           Added float values for fractions.
+
     min_samples_leaf : int or float, default=1
         The minimum number of samples required to be at a leaf node.
         A split point at any depth will only be considered if it leaves at
@@ -918,6 +933,9 @@ class RandomForestQuantileRegressor(BaseForestQuantileRegressor):
           `ceil(min_samples_leaf * n_samples)` are the minimum
           number of samples for each node.
 
+        .. sklearn-versionchanged:: 0.18
+           Added float values for fractions.
+
     max_samples_leaf : int, float or None, default=1
         The maximum number of samples permitted to be at a leaf node.
 
@@ -947,6 +965,10 @@ class RandomForestQuantileRegressor(BaseForestQuantileRegressor):
             The default of 1.0 is equivalent to bagged trees and more
             randomness can be achieved by setting smaller values, e.g. 0.3.
 
+        .. sklearn-versionchanged:: 1.1
+            The default of `max_features` changed from `"auto"` to 1.0.
+
+
         Note: the search for a split does not stop until at least one
         valid partition of the node samples is found, even if it requires to
         effectively inspect more than ``max_features`` features.
@@ -972,6 +994,8 @@ class RandomForestQuantileRegressor(BaseForestQuantileRegressor):
         ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,
         if ``sample_weight`` is passed.
 
+        .. sklearn-versionadded:: 0.19
+
     bootstrap : bool, default=True
         Whether bootstrap samples are used when building trees. If False, the
         whole dataset is used to build each tree.
@@ -1007,6 +1031,8 @@ class RandomForestQuantileRegressor(BaseForestQuantileRegressor):
         subtree with the largest cost complexity that is smaller than
         ``ccp_alpha`` will be chosen. By default, no pruning is performed.
 
+        .. sklearn-versionadded:: 0.22
+
     max_samples : int or float, default=None
         If bootstrap is True, the number of samples to draw from X
         to train each base estimator.
@@ -1016,6 +1042,8 @@ class RandomForestQuantileRegressor(BaseForestQuantileRegressor):
         - If float, then draw `max(round(n_samples * max_samples), 1)` samples.
           Thus, `max_samples` should be in the interval `(0.0, 1.0]`.
 
+        .. sklearn-versionadded:: 0.22
+
     monotonic_cst : array-like of int of shape (n_features), default=None
         Indicates the monotonicity constraint to enforce on each feature.
           - 1: monotonically increasing
@@ -1028,12 +1056,17 @@ class RandomForestQuantileRegressor(BaseForestQuantileRegressor):
           - multioutput regressions (i.e. when `n_outputs_ > 1`),
           - regressions trained on data with missing values.
 
+        .. sklearn-versionadded:: 1.4
+
     Attributes
     ----------
     estimator_ : :class:`~sklearn.tree.DecisionTreeRegressor`
         The child estimator template used to create the collection of fitted
         sub-estimators.
 
+        .. sklearn-versionadded:: 1.2
+           `base_estimator_` was renamed to `estimator_`.
+
     estimators_ : list of DecisionTreeRegressor
         The collection of fitted sub-estimators.
 
@@ -1050,10 +1083,14 @@ class RandomForestQuantileRegressor(BaseForestQuantileRegressor):
     n_features_in_ : int
         Number of features seen during :term:`fit`.
 
+        .. sklearn-versionadded:: 0.24
+
     feature_names_in_ : ndarray of shape (`n_features_in_`,)
         Names of features seen during :term:`fit`. Defined only when `X`
         has feature names that are all strings.
 
+        .. sklearn-versionadded:: 1.0
+
     n_outputs_ : int
         The number of outputs when ``fit`` is performed.
 
@@ -1069,6 +1106,8 @@ class RandomForestQuantileRegressor(BaseForestQuantileRegressor):
         The subset of drawn samples (i.e., the in-bag samples) for each base
         estimator. Each subset is defined by an array of the indices selected.
 
+        .. sklearn-versionadded:: 1.4
+
     See Also
     --------
     ExtraTreesQuantileRegressor : Quantile ensemble of extremely randomized
@@ -1138,6 +1177,7 @@ def __init__(
         warm_start=False,
         ccp_alpha=0.0,
         max_samples=None,
+        monotonic_cst=None,
     ):
         """Initialize random forest quantile regressor."""
         init_dict = {
@@ -1164,6 +1204,8 @@ def __init__(
             "max_samples": max_samples,
             "max_samples_leaf": max_samples_leaf,
         }
+        if sklearn_version >= parse_version("1.4.0"):
+            init_dict["estimator_params"] += ("monotonic_cst",)
         super(RandomForestQuantileRegressor, self).__init__(**init_dict)
 
         self.default_quantiles = default_quantiles
@@ -1177,6 +1219,7 @@ def __init__(
         self.max_leaf_nodes = max_leaf_nodes
         self.min_impurity_decrease = min_impurity_decrease
         self.ccp_alpha = ccp_alpha
+        self.monotonic_cst = monotonic_cst
 
     def _more_tags(self):
         return {
@@ -1201,11 +1244,17 @@ class ExtraTreesQuantileRegressor(BaseForestQuantileRegressor):
     n_estimators : int, default=100
         The number of trees in the forest.
 
+        .. sklearn-versionchanged:: 0.22
+           The default value of `n_estimators` changed from 10 to 100
+           in 0.22.
+
     default_quantiles : float, list, or "mean", default=0.5
         The default quantile or list of quantiles that the model tries to
         predict. Each quantile must be strictly between 0 and 1. If "mean",
         the model predicts the mean.
 
+        .. versionadded:: 1.2
+
     criterion : {"squared_error", "absolute_error", "friedman_mse", "poisson"}, \
             default="squared_error"
         The function to measure the quality of a split. Supported criteria
@@ -1232,6 +1281,9 @@ class ExtraTreesQuantileRegressor(BaseForestQuantileRegressor):
           `ceil(min_samples_split * n_samples)` are the minimum
           number of samples for each split.
 
+        .. sklearn-versionchanged:: 0.18
+           Added float values for fractions.
+
     min_samples_leaf : int or float, default=1
         The minimum number of samples required to be at a leaf node.
         A split point at any depth will only be considered if it leaves at
@@ -1244,6 +1296,9 @@ class ExtraTreesQuantileRegressor(BaseForestQuantileRegressor):
           `ceil(min_samples_leaf * n_samples)` are the minimum
           number of samples for each node.
 
+        .. sklearn-versionchanged:: 0.18
+           Added float values for fractions.
+
     max_samples_leaf : int, float or None, default=1
         The maximum number of samples permitted to be at a leaf node.
 
@@ -1273,6 +1328,9 @@ class ExtraTreesQuantileRegressor(BaseForestQuantileRegressor):
             The default of 1.0 is equivalent to bagged trees and more
             randomness can be achieved by setting smaller values, e.g. 0.3.
 
+        .. sklearn-versionchanged:: 1.1
+            The default of `max_features` changed from `"auto"` to `"sqrt"`.
+
         Note: the search for a split does not stop until at least one
         valid partition of the node samples is found, even if it requires to
         effectively inspect more than ``max_features`` features.
@@ -1298,6 +1356,8 @@ class ExtraTreesQuantileRegressor(BaseForestQuantileRegressor):
         ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,
         if ``sample_weight`` is passed.
 
+        .. sklearn-versionadded:: 0.19
+
     bootstrap : bool, default=False
         Whether bootstrap samples are used when building trees. If False, the
         whole dataset is used to build each tree.
@@ -1336,6 +1396,8 @@ class ExtraTreesQuantileRegressor(BaseForestQuantileRegressor):
         subtree with the largest cost complexity that is smaller than
         ``ccp_alpha`` will be chosen. By default, no pruning is performed.
 
+        .. sklearn-versionadded:: 0.22
+
     max_samples : int or float, default=None
         If bootstrap is True, the number of samples to draw from X
         to train each base estimator.
@@ -1357,12 +1419,17 @@ class ExtraTreesQuantileRegressor(BaseForestQuantileRegressor):
           - multioutput regressions (i.e. when `n_outputs_ > 1`),
           - regressions trained on data with missing values.
 
+        .. sklearn-versionadded:: 1.4
+
     Attributes
     ----------
     estimator_ : :class:`~sklearn.tree.ExtraTreeRegressor`
         The child estimator template used to create the collection of fitted
         sub-estimators.
 
+        .. sklearn-versionadded:: 1.2
+           `base_estimator_` was renamed to `estimator_`.
+
     estimators_ : list of DecisionTreeRegressor
         The collection of fitted sub-estimators.
 
@@ -1379,10 +1446,14 @@ class ExtraTreesQuantileRegressor(BaseForestQuantileRegressor):
     n_features_in_ : int
         Number of features seen during :term:`fit`.
 
+        .. sklearn-versionadded:: 0.24
+
     feature_names_in_ : ndarray of shape (`n_features_in_`,)
         Names of features seen during :term:`fit`. Defined only when `X`
         has feature names that are all strings.
 
+        .. sklearn-versionadded:: 1.0
+
     n_outputs_ : int
         The number of outputs.
 
@@ -1398,6 +1469,8 @@ class ExtraTreesQuantileRegressor(BaseForestQuantileRegressor):
         The subset of drawn samples (i.e., the in-bag samples) for each base
         estimator. Each subset is defined by an array of the indices selected.
 
+        .. sklearn-versionadded:: 1.4
+
     See Also
     --------
     RandomForestQuantileRegressor : Quantile ensemble regressor using trees.
@@ -1453,6 +1526,7 @@ def __init__(
         warm_start=False,
         ccp_alpha=0.0,
         max_samples=None,
+        monotonic_cst=None,
     ):
         """Initialize extra trees quantile regressor."""
         init_dict = {
@@ -1479,6 +1553,8 @@ def __init__(
             "max_samples": max_samples,
             "max_samples_leaf": max_samples_leaf,
         }
+        if sklearn_version >= parse_version("1.4.0"):
+            init_dict["estimator_params"] += ("monotonic_cst",)
         super(ExtraTreesQuantileRegressor, self).__init__(**init_dict)
 
         self.default_quantiles = default_quantiles
@@ -1492,6 +1568,7 @@ def __init__(
         self.max_leaf_nodes = max_leaf_nodes
         self.min_impurity_decrease = min_impurity_decrease
         self.ccp_alpha = ccp_alpha
+        self.monotonic_cst = monotonic_cst
 
     def _more_tags(self):
         return {