Merge branch 'master' into documentation

neurostuff · May 28, 2022 · 15a7c33 · 15a7c33
2 parents 4a1e4b6 + 131445c
commit 15a7c33
Show file tree

Hide file tree

Showing 17 changed files with 462 additions and 54 deletions.
diff --git a/docs/api.rst b/docs/api.rst
@@ -115,3 +115,33 @@ API
    stats.q_gen
    stats.bonferroni
    stats.fdr
+
+:mod:`pymare.datasets`: Meta-analytic datasets
+----------------------------------------------
+
+.. automodule:: pymare.datasets
+   :no-members:
+   :no-inherited-members:
+
+.. currentmodule:: pymare
+
+.. autosummary::
+   :toctree: generated/
+   :template: function.rst
+
+   datasets.michael2013
+
+:mod:`pymare.utils`: Miscellaneous utility functions
+----------------------------------------------------
+
+.. automodule:: pymare.utils
+   :no-members:
+   :no-inherited-members:
+
+.. currentmodule:: pymare
+
+.. autosummary::
+   :toctree: generated/
+   :template: function.rst
+
+   utils.get_resource_path
diff --git a/docs/references.bib b/docs/references.bib
@@ -110,6 +110,19 @@ @article{kosmidis2017improving
   doi={10.1093/biomet/asx001}
 }
 
+@article{michael2013non,
+  title={On the (non) persuasive power of a brain image},
+  author={Michael, Robert B and Newman, Eryn J and Vuorre, Matti and Cumming, Geoff and Garry, Maryanne},
+  journal={Psychonomic bulletin \& review},
+  volume={20},
+  number={4},
+  pages={720--725},
+  year={2013},
+  publisher={Springer},
+  url={https://doi.org/10.3758/s13423-013-0391-6},
+  doi={10.3758/s13423-013-0391-6}
+}
+
 @article{sangnawakij2019meta,
   title={Meta-analysis without study-specific variance information: Heterogeneity case},
   author={Sangnawakij, Patarawan and B{\"o}hning, Dankmar and Niwitpong, Sa-Aat and Adams, Stephen and Stanton, Michael and Holling, Heinz},
@@ -168,6 +181,14 @@ @article{viechtbauer2007confidence
   doi={10.1002/sim.2514}
 }
 
+@Manual{white2022metadat,
+  title={metadat: Meta-Analysis Datasets},
+  author={Thomas White and Daniel Noble and Alistair Senior and W. Kyle Hamilton and Wolfgang Viechtbauer},
+  year={2022},
+  note={R package version 1.2-0},
+  url={https://CRAN.R-project.org/package=metadat}
+}
+
 @article{winkler2016non,
   title={Non-parametric combination and related permutation tests for neuroimaging},
   author={Winkler, Anderson M and Webster, Matthew A and Brooks, Jonathan C and Tracey, Irene and Smith, Stephen M and Nichols, Thomas E},

diff --git a/examples/02_meta-analysis/plot_meta-analysis_walkthrough.py b/examples/02_meta-analysis/plot_meta-analysis_walkthrough.py
@@ -1,7 +1,7 @@
 # emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
 # ex: set sts=4 ts=4 sw=4 et:
 r"""
-.. _meta1:
+.. _meta_walkthrough:
 
 ================================================
  Run Estimators on a simulated dataset
@@ -24,18 +24,11 @@
 import seaborn as sns
 from scipy import stats
 
-import pymare
+from pymare import core, estimators
+from pymare.stats import var_to_ci
 
 sns.set_style("whitegrid")
 
-
-# A small function to make things easier later on
-def var_to_ci(y, v, n):
-    """Convert sampling variance to 95% CI"""
-    term = 1.96 * np.sqrt(v) / np.sqrt(n)
-    return y - term, y + term
-
-
 ###############################################################################
 # Here we simulate a dataset
 # -----------------------------------------------------------------------------
@@ -115,7 +108,7 @@ def var_to_ci(y, v, n):
 ###############################################################################
 # Create a Dataset object containing the data
 # --------------------------------------------
-dset = pymare.core.Dataset(y=y, X=None, v=v, n=n, add_intercept=True)
+dset = core.Dataset(y=y, X=None, v=v, n=n, add_intercept=True)
 
 # Here is a dictionary to house results across models
 results = {}
@@ -152,13 +145,13 @@ def var_to_ci(y, v, n):
 # The two combination models in PyMARE are Stouffer's and Fisher's Tests.
 #
 # Notice that these models don't use :class:`~pymare.core.Dataset` objects.
-stouff = pymare.estimators.StoufferCombinationTest()
+stouff = estimators.StoufferCombinationTest()
 stouff.fit(z[:, None])
 print("Stouffers")
 print("p: {}".format(stouff.params_["p"]))
 print()
 
-fisher = pymare.estimators.FisherCombinationTest()
+fisher = estimators.FisherCombinationTest()
 fisher.fit(z[:, None])
 print("Fishers")
 print("p: {}".format(fisher.params_["p"]))
@@ -168,7 +161,7 @@ def var_to_ci(y, v, n):
 # `````````````````````````````````````````````````````````````````````````````
 # This estimator does not attempt to estimate between-study variance.
 # Instead, it takes ``tau2`` (:math:`\tau^{2}`) as an argument.
-wls = pymare.estimators.WeightedLeastSquares()
+wls = estimators.WeightedLeastSquares()
 wls.fit_dataset(dset)
 wls_summary = wls.summary()
 results["Weighted Least Squares"] = wls_summary.to_df()
@@ -186,31 +179,31 @@ def var_to_ci(y, v, n):
 # estimating between-study variance, while ``VarianceBasedLikelihoodEstimator``
 # can use either maximum-likelihood (ML) or restricted maximum-likelihood (REML)
 # to iteratively estimate it.
-dsl = pymare.estimators.DerSimonianLaird()
+dsl = estimators.DerSimonianLaird()
 dsl.fit_dataset(dset)
 dsl_summary = dsl.summary()
 results["DerSimonian-Laird"] = dsl_summary.to_df()
 print("DerSimonian-Laird")
 print(dsl_summary.to_df().T)
 print()
 
-hedge = pymare.estimators.Hedges()
+hedge = estimators.Hedges()
 hedge.fit_dataset(dset)
 hedge_summary = hedge.summary()
 results["Hedges"] = hedge_summary.to_df()
 print("Hedges")
 print(hedge_summary.to_df().T)
 print()
 
-vb_ml = pymare.estimators.VarianceBasedLikelihoodEstimator(method="ML")
+vb_ml = estimators.VarianceBasedLikelihoodEstimator(method="ML")
 vb_ml.fit_dataset(dset)
 vb_ml_summary = vb_ml.summary()
 results["Variance-Based with ML"] = vb_ml_summary.to_df()
 print("Variance-Based with ML")
 print(vb_ml_summary.to_df().T)
 print()
 
-vb_reml = pymare.estimators.VarianceBasedLikelihoodEstimator(method="REML")
+vb_reml = estimators.VarianceBasedLikelihoodEstimator(method="REML")
 vb_reml.fit_dataset(dset)
 vb_reml_summary = vb_reml.summary()
 results["Variance-Based with REML"] = vb_reml_summary.to_df()
@@ -221,15 +214,15 @@ def var_to_ci(y, v, n):
 # The ``SampleSizeBasedLikelihoodEstimator`` estimates between-study variance
 # using ``y`` and ``n``, but assumes within-study variance is homogenous
 # across studies.
-sb_ml = pymare.estimators.SampleSizeBasedLikelihoodEstimator(method="ML")
+sb_ml = estimators.SampleSizeBasedLikelihoodEstimator(method="ML")
 sb_ml.fit_dataset(dset)
 sb_ml_summary = sb_ml.summary()
 results["Sample Size-Based with ML"] = sb_ml_summary.to_df()
 print("Sample Size-Based with ML")
 print(sb_ml_summary.to_df().T)
 print()
 
-sb_reml = pymare.estimators.SampleSizeBasedLikelihoodEstimator(method="REML")
+sb_reml = estimators.SampleSizeBasedLikelihoodEstimator(method="REML")
 sb_reml.fit_dataset(dset)
 sb_reml_summary = sb_reml.summary()
 results["Sample Size-Based with REML"] = sb_reml_summary.to_df()

diff --git a/examples/02_meta-analysis/plot_run_meta-analysis.py b/examples/02_meta-analysis/plot_run_meta-analysis.py
@@ -1,34 +1,68 @@
 # emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
 # ex: set sts=4 ts=4 sw=4 et:
 """
-.. _meta2:
+.. _meta_basics:
 
-========================
- Running a meta-analysis
-========================
+=====================================
+The Basics of Running a Meta-Analysis
+=====================================
 
-PyMARE implements a range of meta-analytic estimators.
+Here we walk through the basic steps of running a meta-analysis with PyMARE.
 """
 ###############################################################################
 # Start with the necessary imports
-# --------------------------------
-import numpy as np
+# -----------------------------------------------------------------------------
+from pprint import pprint
 
-from pymare import core, estimators
+from pymare import core, datasets, estimators
 
 ###############################################################################
-# Here we simulate a dataset
-# -----------------------------------
-n_studies = 100
-v = np.random.random((n_studies))
-y = np.random.random((n_studies))
-X = np.random.random((n_studies, 5))
-n = np.random.randint(5, 50, size=n_studies)
+# Load the data
+# -----------------------------------------------------------------------------
+# We will use the :footcite:t:`michael2013non` dataset, which comes from the
+# metadat library :footcite:p:`white2022metadat`.
+#
+# We only want to do a mean analysis, so we won't have any covariates except for
+# an intercept.
+data, meta = datasets.michael2013()
+dset = core.Dataset(data=data, y="yi", v="vi", X=None, add_intercept=True)
+dset.to_df()
 
 ###############################################################################
-# Datasets can also be created from pandas DataFrames
-# ---------------------------------------------------
-dataset = core.Dataset(v=v, X=X, y=y, n=n)
-est = estimators.WeightedLeastSquares().fit_dataset(dataset)
+# Now we fit a model
+# -----------------------------------------------------------------------------
+# You must first initialize the estimator, after which you can use
+# :meth:`~pymare.estimators.estimators.BaseEstimator.fit` to fit the model to
+# numpy arrays, or
+# :meth:`~pymare.estimators.estimators.BaseEstimator.fit_dataset` to fit it to
+# a :class:`~pymare.core.Dataset`.
+#
+# The :meth:`~pymare.estimators.estimators.BaseEstimator.summary` function
+# will return a :class:`~pymare.results.MetaRegressionResults` object,
+# which contains the results of the analysis.
+est = estimators.WeightedLeastSquares().fit_dataset(dset)
 results = est.summary()
-print(results.to_df())
+results.to_df()
+
+###############################################################################
+# We can also extract some useful information from the results object
+# -----------------------------------------------------------------------------
+# The :meth:`~pymare.results.MetaRegressionResults.get_heterogeneity_stats`
+# method will calculate heterogeneity statistics.
+pprint(results.get_heterogeneity_stats())
+
+###############################################################################
+# The :meth:`~pymare.results.MetaRegressionResults.get_re_stats` method will
+# estimate the confidence interval for :math:`\tau^2`.
+pprint(results.get_re_stats())
+
+###############################################################################
+# The :meth:`~pymare.results.MetaRegressionResults.permutation_test` method
+# will run a permutation test to estimate more accurate p-values.
+perm_results = results.permutation_test(n_perm=1000)
+perm_results.to_df()
+
+###############################################################################
+# References
+# -----------------------------------------------------------------------------
+# .. footbibliography::
diff --git a/pymare/core.py b/pymare/core.py
@@ -5,6 +5,8 @@
 import numpy as np
 import pandas as pd
 
+from pymare.utils import _listify
+
 from .estimators import (
     DerSimonianLaird,
     Hedges,
@@ -65,12 +67,25 @@ def __init__(
                 "data argument."
             )
 
+        if (X is None) and (not add_intercept):
+            raise ValueError("If no X matrix is provided, add_intercept must be True!")
+
         # Extract columns from DataFrame
         if data is not None:
             y = data.loc[:, y or "y"].values
-            v = data.loc[:, v or "v"].values
-            X_names = X or "X"
-            X = data.loc[:, X_names].values
+
+            # v is optional
+            if (v is not None) or ("v" in data.columns):
+                v = data.loc[:, v or "v"].values
+
+            # X is optional
+            if (X is not None) or ("X" in data.columns):
+                X_names = X or "X"
+                X = data.loc[:, X_names].values
+
+            # n is optional
+            if (n is not None) or ("n" in data.columns):
+                n = data.loc[:, n or "n"].values
 
         self.y = ensure_2d(y)
         self.v = ensure_2d(v)
@@ -85,14 +100,61 @@ def _get_predictors(self, X, names, add_intercept):
                 "No fixed predictors found. If no X matrix is "
                 "provided, add_intercept must be True!"
             )
+
         X = pd.DataFrame(X)
         if names is not None:
-            X.columns = names
+            X.columns = _listify(names)
+
         if add_intercept:
             intercept = pd.DataFrame({"intercept": np.ones(len(self.y))})
             X = pd.concat([intercept, X], axis=1)
+
         return X.values, X.columns.tolist()
 
+    def to_df(self):
+        """Convert the dataset to a pandas DataFrame.
+
+        Returns
+        -------
+        :obj:`pandas.DataFrame`
+            A DataFrame containing the y, v, X, and n values.
+        """
+        if self.y.shape[1] == 1:
+            df = pd.DataFrame({"y": self.y[:, 0]})
+
+            if self.v is not None:
+                df["v"] = self.v[:, 0]
+
+            if self.n is not None:
+                df["n"] = self.n[:, 0]
+
+            df[self.X_names] = self.X
+
+        else:
+            all_dfs = []
+            for i_set in range(self.y.shape[1]):
+                df = pd.DataFrame(
+                    {
+                        "set": np.full(self.y.shape[0], i_set),
+                        "y": self.y[:, i_set],
+                    }
+                )
+
+                if self.v is not None:
+                    df["v"] = self.v[:, i_set]
+
+                if self.n is not None:
+                    df["n"] = self.n[:, i_set]
+
+                # X is the same across sets
+                df[self.X_names] = self.X
+
+                all_dfs.append(df)
+
+            df = pd.concat(all_dfs, axis=0)
+
+        return df
+
 
 def meta_regression(
     y=None,

diff --git a/pymare/datasets/__init__.py b/pymare/datasets/__init__.py
@@ -0,0 +1,6 @@
+"""Open meta-analytic datasets."""
+from .metadat import michael2013
+
+__all__ = [
+    "michael2013",
+]