Skip to content

Commit

Permalink
Merge branch 'master' into documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
tsalo authored May 28, 2022
2 parents 4a1e4b6 + 131445c commit 15a7c33
Show file tree
Hide file tree
Showing 17 changed files with 462 additions and 54 deletions.
30 changes: 30 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,33 @@ API
stats.q_gen
stats.bonferroni
stats.fdr

:mod:`pymare.datasets`: Meta-analytic datasets
----------------------------------------------

.. automodule:: pymare.datasets
:no-members:
:no-inherited-members:

.. currentmodule:: pymare

.. autosummary::
:toctree: generated/
:template: function.rst

datasets.michael2013

:mod:`pymare.utils`: Miscellaneous utility functions
----------------------------------------------------

.. automodule:: pymare.utils
:no-members:
:no-inherited-members:

.. currentmodule:: pymare

.. autosummary::
:toctree: generated/
:template: function.rst

utils.get_resource_path
21 changes: 21 additions & 0 deletions docs/references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,19 @@ @article{kosmidis2017improving
doi={10.1093/biomet/asx001}
}

@article{michael2013non,
title={On the (non) persuasive power of a brain image},
author={Michael, Robert B and Newman, Eryn J and Vuorre, Matti and Cumming, Geoff and Garry, Maryanne},
journal={Psychonomic bulletin \& review},
volume={20},
number={4},
pages={720--725},
year={2013},
publisher={Springer},
url={https://doi.org/10.3758/s13423-013-0391-6},
doi={10.3758/s13423-013-0391-6}
}

@article{sangnawakij2019meta,
title={Meta-analysis without study-specific variance information: Heterogeneity case},
author={Sangnawakij, Patarawan and B{\"o}hning, Dankmar and Niwitpong, Sa-Aat and Adams, Stephen and Stanton, Michael and Holling, Heinz},
Expand Down Expand Up @@ -168,6 +181,14 @@ @article{viechtbauer2007confidence
doi={10.1002/sim.2514}
}

@Manual{white2022metadat,
title={metadat: Meta-Analysis Datasets},
author={Thomas White and Daniel Noble and Alistair Senior and W. Kyle Hamilton and Wolfgang Viechtbauer},
year={2022},
note={R package version 1.2-0},
url={https://CRAN.R-project.org/package=metadat}
}

@article{winkler2016non,
title={Non-parametric combination and related permutation tests for neuroimaging},
author={Winkler, Anderson M and Webster, Matthew A and Brooks, Jonathan C and Tracey, Irene and Smith, Stephen M and Nichols, Thomas E},
Expand Down
33 changes: 13 additions & 20 deletions examples/02_meta-analysis/plot_meta-analysis_walkthrough.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
# ex: set sts=4 ts=4 sw=4 et:
r"""
.. _meta1:
.. _meta_walkthrough:
================================================
Run Estimators on a simulated dataset
Expand All @@ -24,18 +24,11 @@
import seaborn as sns
from scipy import stats

import pymare
from pymare import core, estimators
from pymare.stats import var_to_ci

sns.set_style("whitegrid")


# A small function to make things easier later on
def var_to_ci(y, v, n):
"""Convert sampling variance to 95% CI"""
term = 1.96 * np.sqrt(v) / np.sqrt(n)
return y - term, y + term


###############################################################################
# Here we simulate a dataset
# -----------------------------------------------------------------------------
Expand Down Expand Up @@ -115,7 +108,7 @@ def var_to_ci(y, v, n):
###############################################################################
# Create a Dataset object containing the data
# --------------------------------------------
dset = pymare.core.Dataset(y=y, X=None, v=v, n=n, add_intercept=True)
dset = core.Dataset(y=y, X=None, v=v, n=n, add_intercept=True)

# Here is a dictionary to house results across models
results = {}
Expand Down Expand Up @@ -152,13 +145,13 @@ def var_to_ci(y, v, n):
# The two combination models in PyMARE are Stouffer's and Fisher's Tests.
#
# Notice that these models don't use :class:`~pymare.core.Dataset` objects.
stouff = pymare.estimators.StoufferCombinationTest()
stouff = estimators.StoufferCombinationTest()
stouff.fit(z[:, None])
print("Stouffers")
print("p: {}".format(stouff.params_["p"]))
print()

fisher = pymare.estimators.FisherCombinationTest()
fisher = estimators.FisherCombinationTest()
fisher.fit(z[:, None])
print("Fishers")
print("p: {}".format(fisher.params_["p"]))
Expand All @@ -168,7 +161,7 @@ def var_to_ci(y, v, n):
# `````````````````````````````````````````````````````````````````````````````
# This estimator does not attempt to estimate between-study variance.
# Instead, it takes ``tau2`` (:math:`\tau^{2}`) as an argument.
wls = pymare.estimators.WeightedLeastSquares()
wls = estimators.WeightedLeastSquares()
wls.fit_dataset(dset)
wls_summary = wls.summary()
results["Weighted Least Squares"] = wls_summary.to_df()
Expand All @@ -186,31 +179,31 @@ def var_to_ci(y, v, n):
# estimating between-study variance, while ``VarianceBasedLikelihoodEstimator``
# can use either maximum-likelihood (ML) or restricted maximum-likelihood (REML)
# to iteratively estimate it.
dsl = pymare.estimators.DerSimonianLaird()
dsl = estimators.DerSimonianLaird()
dsl.fit_dataset(dset)
dsl_summary = dsl.summary()
results["DerSimonian-Laird"] = dsl_summary.to_df()
print("DerSimonian-Laird")
print(dsl_summary.to_df().T)
print()

hedge = pymare.estimators.Hedges()
hedge = estimators.Hedges()
hedge.fit_dataset(dset)
hedge_summary = hedge.summary()
results["Hedges"] = hedge_summary.to_df()
print("Hedges")
print(hedge_summary.to_df().T)
print()

vb_ml = pymare.estimators.VarianceBasedLikelihoodEstimator(method="ML")
vb_ml = estimators.VarianceBasedLikelihoodEstimator(method="ML")
vb_ml.fit_dataset(dset)
vb_ml_summary = vb_ml.summary()
results["Variance-Based with ML"] = vb_ml_summary.to_df()
print("Variance-Based with ML")
print(vb_ml_summary.to_df().T)
print()

vb_reml = pymare.estimators.VarianceBasedLikelihoodEstimator(method="REML")
vb_reml = estimators.VarianceBasedLikelihoodEstimator(method="REML")
vb_reml.fit_dataset(dset)
vb_reml_summary = vb_reml.summary()
results["Variance-Based with REML"] = vb_reml_summary.to_df()
Expand All @@ -221,15 +214,15 @@ def var_to_ci(y, v, n):
# The ``SampleSizeBasedLikelihoodEstimator`` estimates between-study variance
# using ``y`` and ``n``, but assumes within-study variance is homogenous
# across studies.
sb_ml = pymare.estimators.SampleSizeBasedLikelihoodEstimator(method="ML")
sb_ml = estimators.SampleSizeBasedLikelihoodEstimator(method="ML")
sb_ml.fit_dataset(dset)
sb_ml_summary = sb_ml.summary()
results["Sample Size-Based with ML"] = sb_ml_summary.to_df()
print("Sample Size-Based with ML")
print(sb_ml_summary.to_df().T)
print()

sb_reml = pymare.estimators.SampleSizeBasedLikelihoodEstimator(method="REML")
sb_reml = estimators.SampleSizeBasedLikelihoodEstimator(method="REML")
sb_reml.fit_dataset(dset)
sb_reml_summary = sb_reml.summary()
results["Sample Size-Based with REML"] = sb_reml_summary.to_df()
Expand Down
74 changes: 54 additions & 20 deletions examples/02_meta-analysis/plot_run_meta-analysis.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,68 @@
# emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
# ex: set sts=4 ts=4 sw=4 et:
"""
.. _meta2:
.. _meta_basics:
========================
Running a meta-analysis
========================
=====================================
The Basics of Running a Meta-Analysis
=====================================
PyMARE implements a range of meta-analytic estimators.
Here we walk through the basic steps of running a meta-analysis with PyMARE.
"""
###############################################################################
# Start with the necessary imports
# --------------------------------
import numpy as np
# -----------------------------------------------------------------------------
from pprint import pprint

from pymare import core, estimators
from pymare import core, datasets, estimators

###############################################################################
# Here we simulate a dataset
# -----------------------------------
n_studies = 100
v = np.random.random((n_studies))
y = np.random.random((n_studies))
X = np.random.random((n_studies, 5))
n = np.random.randint(5, 50, size=n_studies)
# Load the data
# -----------------------------------------------------------------------------
# We will use the :footcite:t:`michael2013non` dataset, which comes from the
# metadat library :footcite:p:`white2022metadat`.
#
# We only want to do a mean analysis, so we won't have any covariates except for
# an intercept.
data, meta = datasets.michael2013()
dset = core.Dataset(data=data, y="yi", v="vi", X=None, add_intercept=True)
dset.to_df()

###############################################################################
# Datasets can also be created from pandas DataFrames
# ---------------------------------------------------
dataset = core.Dataset(v=v, X=X, y=y, n=n)
est = estimators.WeightedLeastSquares().fit_dataset(dataset)
# Now we fit a model
# -----------------------------------------------------------------------------
# You must first initialize the estimator, after which you can use
# :meth:`~pymare.estimators.estimators.BaseEstimator.fit` to fit the model to
# numpy arrays, or
# :meth:`~pymare.estimators.estimators.BaseEstimator.fit_dataset` to fit it to
# a :class:`~pymare.core.Dataset`.
#
# The :meth:`~pymare.estimators.estimators.BaseEstimator.summary` function
# will return a :class:`~pymare.results.MetaRegressionResults` object,
# which contains the results of the analysis.
est = estimators.WeightedLeastSquares().fit_dataset(dset)
results = est.summary()
print(results.to_df())
results.to_df()

###############################################################################
# We can also extract some useful information from the results object
# -----------------------------------------------------------------------------
# The :meth:`~pymare.results.MetaRegressionResults.get_heterogeneity_stats`
# method will calculate heterogeneity statistics.
pprint(results.get_heterogeneity_stats())

###############################################################################
# The :meth:`~pymare.results.MetaRegressionResults.get_re_stats` method will
# estimate the confidence interval for :math:`\tau^2`.
pprint(results.get_re_stats())

###############################################################################
# The :meth:`~pymare.results.MetaRegressionResults.permutation_test` method
# will run a permutation test to estimate more accurate p-values.
perm_results = results.permutation_test(n_perm=1000)
perm_results.to_df()

###############################################################################
# References
# -----------------------------------------------------------------------------
# .. footbibliography::
70 changes: 66 additions & 4 deletions pymare/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import numpy as np
import pandas as pd

from pymare.utils import _listify

from .estimators import (
DerSimonianLaird,
Hedges,
Expand Down Expand Up @@ -65,12 +67,25 @@ def __init__(
"data argument."
)

if (X is None) and (not add_intercept):
raise ValueError("If no X matrix is provided, add_intercept must be True!")

# Extract columns from DataFrame
if data is not None:
y = data.loc[:, y or "y"].values
v = data.loc[:, v or "v"].values
X_names = X or "X"
X = data.loc[:, X_names].values

# v is optional
if (v is not None) or ("v" in data.columns):
v = data.loc[:, v or "v"].values

# X is optional
if (X is not None) or ("X" in data.columns):
X_names = X or "X"
X = data.loc[:, X_names].values

# n is optional
if (n is not None) or ("n" in data.columns):
n = data.loc[:, n or "n"].values

self.y = ensure_2d(y)
self.v = ensure_2d(v)
Expand All @@ -85,14 +100,61 @@ def _get_predictors(self, X, names, add_intercept):
"No fixed predictors found. If no X matrix is "
"provided, add_intercept must be True!"
)

X = pd.DataFrame(X)
if names is not None:
X.columns = names
X.columns = _listify(names)

if add_intercept:
intercept = pd.DataFrame({"intercept": np.ones(len(self.y))})
X = pd.concat([intercept, X], axis=1)

return X.values, X.columns.tolist()

def to_df(self):
"""Convert the dataset to a pandas DataFrame.
Returns
-------
:obj:`pandas.DataFrame`
A DataFrame containing the y, v, X, and n values.
"""
if self.y.shape[1] == 1:
df = pd.DataFrame({"y": self.y[:, 0]})

if self.v is not None:
df["v"] = self.v[:, 0]

if self.n is not None:
df["n"] = self.n[:, 0]

df[self.X_names] = self.X

else:
all_dfs = []
for i_set in range(self.y.shape[1]):
df = pd.DataFrame(
{
"set": np.full(self.y.shape[0], i_set),
"y": self.y[:, i_set],
}
)

if self.v is not None:
df["v"] = self.v[:, i_set]

if self.n is not None:
df["n"] = self.n[:, i_set]

# X is the same across sets
df[self.X_names] = self.X

all_dfs.append(df)

df = pd.concat(all_dfs, axis=0)

return df


def meta_regression(
y=None,
Expand Down
6 changes: 6 additions & 0 deletions pymare/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""Open meta-analytic datasets."""
from .metadat import michael2013

__all__ = [
"michael2013",
]
Loading

0 comments on commit 15a7c33

Please sign in to comment.