Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow Results objects to work with Estimators' fit() method #104

Merged
merged 6 commits into from
Jun 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pymare/estimators/combination.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ def _z_to_p(self, z):

def fit(self, z, *args, **kwargs):
"""Fit the estimator to z-values."""
# This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called.
self.dataset_ = None

if self.mode == "concordant":
ose = self.__class__(mode="directed")
p1 = ose.p_value(z, *args, **kwargs)
Expand Down
98 changes: 93 additions & 5 deletions pymare/estimators/estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,24 @@ def __init__(self, tau2=0.0):
self.tau2 = tau2

def fit(self, y, X, v=None):
"""Fit the estimator to data."""
"""Fit the estimator to data.

Parameters
----------
y : :obj:`numpy.ndarray` of shape (n, d)
The dependent variable(s) (y).
X : :obj:`numpy.ndarray` of shape (n, p)
The independent variable(s) (X).
v : :obj:`numpy.ndarray` of shape (n, d), optional
Sampling variances. If not provided, unit weights will be used.

Returns
-------
:obj:`~pymare.estimators.WeightedLeastSquares`
"""
# This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called.
self.dataset_ = None

if v is None:
v = np.ones_like(y)

Expand Down Expand Up @@ -219,7 +236,24 @@ class DerSimonianLaird(BaseEstimator):
"""

def fit(self, y, v, X):
"""Fit the estimator to data."""
"""Fit the estimator to data.

Parameters
----------
y : :obj:`numpy.ndarray` of shape (n, d)
The dependent variable(s) (y).
v : :obj:`numpy.ndarray` of shape (n, d)
Sampling variances.
X : :obj:`numpy.ndarray` of shape (n, p)
The independent variable(s) (X).

Returns
-------
:obj:`~pymare.estimators.DerSimonianLaird`
"""
# This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called.
self.dataset_ = None

y = ensure_2d(y)
v = ensure_2d(v)

Expand Down Expand Up @@ -266,7 +300,24 @@ class Hedges(BaseEstimator):
"""

def fit(self, y, v, X):
"""Fit the estimator to data."""
"""Fit the estimator to data.

Parameters
----------
y : :obj:`numpy.ndarray` of shape (n, d)
The dependent variable(s) (y).
v : :obj:`numpy.ndarray` of shape (n, d)
Sampling variances.
X : :obj:`numpy.ndarray` of shape (n, p)
The independent variable(s) (X).

Returns
-------
:obj:`~pymare.estimators.Hedges`
"""
# This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called.
self.dataset_ = None

k, p = X.shape[:2]
_unit_v = np.ones_like(y)
beta, inv_cov = weighted_least_squares(y, _unit_v, X, return_cov=True)
Expand Down Expand Up @@ -317,7 +368,24 @@ def __init__(self, method="ml", **kwargs):

@_loopable
def fit(self, y, v, X):
"""Fit the estimator to data."""
"""Fit the estimator to data.

Parameters
----------
y : :obj:`numpy.ndarray` of shape (n, d)
The dependent variable(s) (y).
v : :obj:`numpy.ndarray` of shape (n, d)
Sampling variances.
X : :obj:`numpy.ndarray` of shape (n, p)
The independent variable(s) (X).

Returns
-------
:obj:`~pymare.estimators.VarianceBasedLikelihoodEstimator`
"""
# This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called.
self.dataset_ = None

# use D-L estimate for initial values
est_DL = DerSimonianLaird().fit(y, v, X).params_
beta = est_DL["fe_params"]
Expand Down Expand Up @@ -393,7 +461,24 @@ def __init__(self, method="ml", **kwargs):

@_loopable
def fit(self, y, n, X):
"""Fit the estimator to data."""
"""Fit the estimator to data.

Parameters
----------
y : :obj:`numpy.ndarray` of shape (n, d)
The dependent variable(s) (y).
n : :obj:`numpy.ndarray` of shape (n, d)
Sample sizes.
X : :obj:`numpy.ndarray` of shape (n, p)
The independent variable(s) (X).

Returns
-------
:obj:`~pymare.estimators.SampleSizeBasedLikelihoodEstimator`
"""
# This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called.
self.dataset_ = None

if n.std() < np.sqrt(np.finfo(float).eps):
raise ValueError(
"Sample size-based likelihood estimator cannot "
Expand Down Expand Up @@ -550,6 +635,9 @@ def fit(self, y, v, X, groups=None):
`groups` argument can be used to specify the nesting structure
(i.e., which rows in `y`, `v`, and `X` belong to each study).
"""
# This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called.
self.dataset_ = None

if y.ndim > 1 and y.shape[1] > 1:
raise ValueError(
"The StanMetaRegression estimator currently does "
Expand Down
46 changes: 46 additions & 0 deletions pymare/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ class MetaRegressionResults:
tau2 : None or :obj:`numpy.ndarray` of shape (d,) or :obj:`float`, optional
A 1-d array containing the estimated tau^2 value for each parallel dataset
(or a float, for a single dataset). May be omitted by fixed-effects estimators.

Warning
-------
When an Estimator is fitted to arrays directly using the ``fit`` method, the Results object's
utility is limited.
Many methods will not work.
"""

def __init__(self, estimator, dataset, fe_params, fe_cov, tau2=None):
Expand Down Expand Up @@ -92,6 +98,11 @@ def get_fe_stats(self, alpha=0.05):
def get_re_stats(self, method="QP", alpha=0.05):
"""Get random-effect statistics.

.. warning::

This method relies on the ``.dataset`` attribute, so the original Estimator must have
be fitted with ``fit_dataset``, not ``fit``.

Parameters
----------
method : {"QP"}, optional
Expand All @@ -115,6 +126,9 @@ def get_re_stats(self, method="QP", alpha=0.05):
----------
.. footbibliography::
"""
if self.dataset is None:
raise ValueError("The Dataset is unavailable. This method requires a Dataset.")

if method == "QP":
n_datasets = np.atleast_2d(self.tau2).shape[1]
if n_datasets > 10:
Expand Down Expand Up @@ -157,6 +171,11 @@ def get_re_stats(self, method="QP", alpha=0.05):
def get_heterogeneity_stats(self):
"""Get heterogeneity statistics.

.. warning::

This method relies on the ``.dataset`` attribute, so the original Estimator must have
be fitted with ``fit_dataset``, not ``fit``.

Returns
-------
:obj:`dict`
Expand All @@ -180,6 +199,9 @@ def get_heterogeneity_stats(self):
----------
.. footbibliography::
"""
if self.dataset is None:
raise ValueError("The Dataset is unavailable. This method requires a Dataset.")

v = self.estimator.get_v(self.dataset)
q_fe = q_gen(self.dataset.y, v, self.dataset.X, 0)
df = self.dataset.y.shape[0] - self.dataset.X.shape[1]
Expand All @@ -195,6 +217,11 @@ def to_df(self, alpha=0.05):

This method only works for one-dimensional results.

.. warning::

This method relies on the ``.dataset`` attribute, so the original Estimator must have
be fitted with ``fit_dataset``, not ``fit``.

Parameters
----------
alpha : :obj:`float`, optional
Expand All @@ -217,6 +244,9 @@ def to_df(self, alpha=0.05):
the CI columns will be ``"ci_0.025"`` and ``"ci_0.975"``.
=========== ==========================================================================
"""
if self.dataset is None:
raise ValueError("The Dataset is unavailable. This method requires a Dataset.")

b_shape = self.fe_params.shape
if len(b_shape) > 1 and b_shape[1] > 1:
raise ValueError(
Expand All @@ -237,6 +267,11 @@ def to_df(self, alpha=0.05):
def permutation_test(self, n_perm=1000):
"""Run permutation test.

.. warning::

This method relies on the ``.dataset`` attribute, so the original Estimator must have
be fitted with ``fit_dataset``, not ``fit``.

Parameters
----------
n_perm : :obj:`int`, optional
Expand All @@ -260,6 +295,9 @@ def permutation_test(self, n_perm=1000):
This means that one can often set very high n_perm values (e.g., 100k) with little
performance degradation.
"""
if self.dataset is None:
raise ValueError("The Dataset is unavailable. This method requires a Dataset.")

n_obs, n_datasets = self.dataset.y.shape
has_mods = self.dataset.X.shape[1] > 1

Expand Down Expand Up @@ -381,6 +419,11 @@ def p(self):
def permutation_test(self, n_perm=1000):
"""Run permutation test.

.. warning::

This method relies on the ``.dataset`` attribute, so the original Estimator must have
be fitted with ``fit_dataset``, not ``fit``.

Parameters
----------
n_perm : :obj:`int`, optional
Expand All @@ -403,6 +446,9 @@ def permutation_test(self, n_perm=1000):
set very high n_perm values (e.g., 100k) with little performance
degradation.
"""
if self.dataset is None:
raise ValueError("The Dataset is unavailable. This method requires a Dataset.")

n_obs, n_datasets = self.dataset.y.shape

# create results arrays
Expand Down
65 changes: 65 additions & 0 deletions pymare/tests/test_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,71 @@ def results_2d(fitted_estimator, dataset_2d):
return est.fit_dataset(dataset_2d).summary()


def test_meta_regression_results_from_arrays(dataset):
"""Ensure that a MetaRegressionResults can be created from arrays.

This is a regression test for a bug that caused the MetaRegressionResults
to fail when Estimators were fitted to arrays instead of Datasets.
See https://github.com/neurostuff/PyMARE/issues/52 for more info.
"""
est = DerSimonianLaird()
fitted_estimator = est.fit(y=dataset.y, X=dataset.X, v=dataset.v)
results = fitted_estimator.summary()
assert isinstance(results, MetaRegressionResults)
assert results.fe_params.shape == (2, 1)
assert results.fe_cov.shape == (2, 2, 1)
assert results.tau2.shape == (1,)

# fit overwrites dataset_ attribute with None
assert fitted_estimator.dataset_ is None
# fit_dataset overwrites it with the Dataset
fitted_estimator.fit_dataset(dataset)
assert isinstance(fitted_estimator.dataset_, Dataset)
# fit sets it back to None
fitted_estimator.fit(y=dataset.y, X=dataset.X, v=dataset.v)
assert fitted_estimator.dataset_ is None

# Some methods are not available if fit was used
results = fitted_estimator.summary()
with pytest.raises(ValueError):
results.get_re_stats()

with pytest.raises(ValueError):
results.get_heterogeneity_stats()

with pytest.raises(ValueError):
results.to_df()

with pytest.raises(ValueError):
results.permutation_test(1000)


def test_combination_test_results_from_arrays(dataset):
"""Ensure that a CombinationTestResults can be created from arrays.

This is a regression test for a bug that caused the MetaRegressionResults
to fail when Estimators were fitted to arrays instead of Datasets.
See https://github.com/neurostuff/PyMARE/issues/52 for more info.
"""
fitted_estimator = StoufferCombinationTest().fit(z=dataset.y)
results = fitted_estimator.summary()
assert isinstance(results, CombinationTestResults)
assert results.p.shape == (1,)

# fit overwrites dataset_ attribute with None
assert fitted_estimator.dataset_ is None
# fit_dataset overwrites it with the Dataset
fitted_estimator.fit_dataset(dataset)
assert isinstance(fitted_estimator.dataset_, Dataset)
# fit sets it back to None
fitted_estimator.fit(z=dataset.y)
assert fitted_estimator.dataset_ is None

# Some methods are not available if fit was used
with pytest.raises(ValueError):
fitted_estimator.summary().permutation_test(1000)


def test_meta_regression_results_init_1d(fitted_estimator):
"""Test MetaRegressionResults from 1D data."""
est = fitted_estimator
Expand Down