From 5caf53751e23667043e771c37854f593fc705ac3 Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Fri, 10 May 2024 15:30:34 -0400 Subject: [PATCH 01/32] Initial reorg --- .gitignore | 1 + MANIFEST.in | 3 +- dev_environment.yml | 26 - docs/api.rst | 303 ++-- environment.yml | 15 - netneurotools/__init__.py | 13 +- netneurotools/civet.py | 104 -- netneurotools/datasets/__init__.py | 63 +- .../datasets/{mirchi.py => _mirchi2018.py} | 44 - .../{data/osf.json => datasets/datasets.json} | 2 +- netneurotools/datasets/datasets_utils.py | 185 ++ netneurotools/datasets/fetch_atlas.py | 413 +++++ netneurotools/datasets/fetch_project.py | 221 +++ netneurotools/datasets/fetch_template.py | 353 ++++ netneurotools/datasets/fetchers.py | 882 --------- .../{ => datasets}/tests/__init__.py | 0 .../datasets/tests/test_datasetsutils.py | 35 + .../tests/test_fetch.py} | 192 +- netneurotools/datasets/utils.py | 100 -- netneurotools/experimental/__init__.py | 4 + netneurotools/freesurfer.py | 662 ------- netneurotools/interface/__init__.py | 3 + netneurotools/interface/freesurfer.py | 1 + netneurotools/interface/tests/__init__.py | 0 .../interface/tests/test_freesurfer.py | 1 + netneurotools/metrics/__init__.py | 66 + netneurotools/{metrics.py => metrics/bct.py} | 622 +++---- netneurotools/metrics/communication.py | 1 + netneurotools/metrics/control.py | 1 + netneurotools/metrics/metrics_utils.py | 65 + netneurotools/metrics/spreading.py | 6 + netneurotools/metrics/statistical.py | 660 +++++++ netneurotools/metrics/tests/__init__.py | 0 .../tests/test_bct.py} | 9 +- .../metrics/tests/test_communication.py | 1 + netneurotools/metrics/tests/test_control.py | 1 + netneurotools/metrics/tests/test_spreading.py | 1 + .../metrics/tests/test_statistical.py | 1 + netneurotools/modularity.py | 316 ---- netneurotools/modularity/__init__.py | 25 + .../{cluster.py => modularity/modules.py} | 315 +++- netneurotools/modularity/tests/__init__.py | 0 .../tests/test_modules.py} | 61 +- netneurotools/networks/__init__.py | 33 + netneurotools/networks/consensus.py | 295 +++ netneurotools/networks/generative.py | 1 + netneurotools/networks/networks_utils.py | 127 ++ .../{networks.py => networks/randomize.py} | 528 +----- netneurotools/networks/tests/__init__.py | 0 .../networks/tests/test_consensus.py | 1 + .../networks/tests/test_generative.py | 1 + .../networks/tests/test_networks_utils.py | 11 + .../networks/tests/test_randomize.py | 1 + netneurotools/plotting/__init__.py | 34 + .../{colors.py => plotting/color_utils.py} | 3 +- netneurotools/plotting/mpl_plotters.py | 289 +++ .../pysurfer_plotters.py} | 343 +--- netneurotools/plotting/pyvista_plotters.py | 5 + netneurotools/plotting/tests/__init__.py | 0 .../plotting/tests/test_colorutils.py | 1 + netneurotools/plotting/tests/test_mpl.py | 36 + netneurotools/plotting/tests/test_pysurfer.py | 27 + netneurotools/plotting/tests/test_pyvista.py | 1 + netneurotools/spatial/__init__.py | 12 + .../spatial/gaussian_random_field.py | 1 + netneurotools/spatial/spatial_stats.py | 10 + netneurotools/spatial/tests/__init__.py | 0 netneurotools/spatial/tests/test_grf.py | 1 + .../spatial/tests/test_spatialstats.py | 1 + netneurotools/stats.py | 1593 ----------------- netneurotools/stats/__init__.py | 37 + .../generators.py => stats/correlation.py} | 101 +- netneurotools/stats/permutation_test.py | 282 +++ netneurotools/stats/regression.py | 257 +++ netneurotools/stats/stats_utils.py | 3 + netneurotools/stats/tests/__init__.py | 0 netneurotools/stats/tests/test_correlation.py | 66 + netneurotools/stats/tests/test_permutation.py | 65 + netneurotools/stats/tests/test_regression.py | 14 + netneurotools/surface.py | 189 -- netneurotools/tests/test_civet.py | 30 - netneurotools/tests/test_freesurfer.py | 82 - netneurotools/tests/test_modularity.py | 44 - netneurotools/tests/test_plotting.py | 58 - netneurotools/tests/test_stats.py | 172 -- netneurotools/tests/test_utils.py | 47 - netneurotools/utils.py | 243 --- pyproject.toml | 29 +- 88 files changed, 4825 insertions(+), 6026 deletions(-) delete mode 100644 dev_environment.yml delete mode 100644 environment.yml delete mode 100644 netneurotools/civet.py rename netneurotools/datasets/{mirchi.py => _mirchi2018.py} (71%) rename netneurotools/{data/osf.json => datasets/datasets.json} (99%) create mode 100644 netneurotools/datasets/datasets_utils.py create mode 100644 netneurotools/datasets/fetch_atlas.py create mode 100644 netneurotools/datasets/fetch_project.py create mode 100644 netneurotools/datasets/fetch_template.py delete mode 100644 netneurotools/datasets/fetchers.py rename netneurotools/{ => datasets}/tests/__init__.py (100%) create mode 100644 netneurotools/datasets/tests/test_datasetsutils.py rename netneurotools/{tests/test_datasets.py => datasets/tests/test_fetch.py} (66%) delete mode 100644 netneurotools/datasets/utils.py create mode 100644 netneurotools/experimental/__init__.py delete mode 100644 netneurotools/freesurfer.py create mode 100644 netneurotools/interface/__init__.py create mode 100644 netneurotools/interface/freesurfer.py create mode 100644 netneurotools/interface/tests/__init__.py create mode 100644 netneurotools/interface/tests/test_freesurfer.py create mode 100644 netneurotools/metrics/__init__.py rename netneurotools/{metrics.py => metrics/bct.py} (95%) create mode 100644 netneurotools/metrics/communication.py create mode 100644 netneurotools/metrics/control.py create mode 100644 netneurotools/metrics/metrics_utils.py create mode 100644 netneurotools/metrics/spreading.py create mode 100644 netneurotools/metrics/statistical.py create mode 100644 netneurotools/metrics/tests/__init__.py rename netneurotools/{tests/test_metrics.py => metrics/tests/test_bct.py} (72%) create mode 100644 netneurotools/metrics/tests/test_communication.py create mode 100644 netneurotools/metrics/tests/test_control.py create mode 100644 netneurotools/metrics/tests/test_spreading.py create mode 100644 netneurotools/metrics/tests/test_statistical.py delete mode 100644 netneurotools/modularity.py create mode 100644 netneurotools/modularity/__init__.py rename netneurotools/{cluster.py => modularity/modules.py} (58%) create mode 100644 netneurotools/modularity/tests/__init__.py rename netneurotools/{tests/test_cluster.py => modularity/tests/test_modules.py} (58%) create mode 100644 netneurotools/networks/__init__.py create mode 100644 netneurotools/networks/consensus.py create mode 100644 netneurotools/networks/generative.py create mode 100644 netneurotools/networks/networks_utils.py rename netneurotools/{networks.py => networks/randomize.py} (65%) create mode 100644 netneurotools/networks/tests/__init__.py create mode 100644 netneurotools/networks/tests/test_consensus.py create mode 100644 netneurotools/networks/tests/test_generative.py create mode 100644 netneurotools/networks/tests/test_networks_utils.py create mode 100644 netneurotools/networks/tests/test_randomize.py create mode 100644 netneurotools/plotting/__init__.py rename netneurotools/{colors.py => plotting/color_utils.py} (98%) create mode 100644 netneurotools/plotting/mpl_plotters.py rename netneurotools/{plotting.py => plotting/pysurfer_plotters.py} (61%) create mode 100644 netneurotools/plotting/pyvista_plotters.py create mode 100644 netneurotools/plotting/tests/__init__.py create mode 100644 netneurotools/plotting/tests/test_colorutils.py create mode 100644 netneurotools/plotting/tests/test_mpl.py create mode 100644 netneurotools/plotting/tests/test_pysurfer.py create mode 100644 netneurotools/plotting/tests/test_pyvista.py create mode 100644 netneurotools/spatial/__init__.py create mode 100644 netneurotools/spatial/gaussian_random_field.py create mode 100644 netneurotools/spatial/spatial_stats.py create mode 100644 netneurotools/spatial/tests/__init__.py create mode 100644 netneurotools/spatial/tests/test_grf.py create mode 100644 netneurotools/spatial/tests/test_spatialstats.py delete mode 100644 netneurotools/stats.py create mode 100644 netneurotools/stats/__init__.py rename netneurotools/{datasets/generators.py => stats/correlation.py} (50%) create mode 100644 netneurotools/stats/permutation_test.py create mode 100644 netneurotools/stats/regression.py create mode 100644 netneurotools/stats/stats_utils.py create mode 100644 netneurotools/stats/tests/__init__.py create mode 100644 netneurotools/stats/tests/test_correlation.py create mode 100644 netneurotools/stats/tests/test_permutation.py create mode 100644 netneurotools/stats/tests/test_regression.py delete mode 100644 netneurotools/surface.py delete mode 100644 netneurotools/tests/test_civet.py delete mode 100644 netneurotools/tests/test_freesurfer.py delete mode 100644 netneurotools/tests/test_modularity.py delete mode 100644 netneurotools/tests/test_plotting.py delete mode 100644 netneurotools/tests/test_stats.py delete mode 100644 netneurotools/tests/test_utils.py delete mode 100644 netneurotools/utils.py diff --git a/.gitignore b/.gitignore index 3bfe751..2a0ee24 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ docs/generated/ docs/auto_examples/ docs/modules/ +docs/sg_execution_times.rst # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/MANIFEST.in b/MANIFEST.in index 09d993a..6467bdd 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,2 @@ include README.rst LICENSE environment.yml requirements.txt -recursive-include netneurotools/data * -include versioneer.py +include versioneer.py \ No newline at end of file diff --git a/dev_environment.yml b/dev_environment.yml deleted file mode 100644 index dd89823..0000000 --- a/dev_environment.yml +++ /dev/null @@ -1,26 +0,0 @@ -name: netneurotools -channels: - - defaults - - conda-forge -dependencies: - - python>=3.6 - - flake8 - - matplotlib - - mayavi - - nibabel - - nilearn - - numba - - "numpy>=1.16" - - pandas - - pip - - "pytest>=3.6" - - pytest-cov - - scikit-learn - - "scipy>=1.4.0" - - "sphinx>=1.2" - - sphinx-gallery - - sphinx_rtd_theme - - versioneer - - pip: - - git+https://github.com/aestrivex/bctpy.git#egg=bctpy - - pysurfer diff --git a/docs/api.rst b/docs/api.rst index a5adc5c..601c92d 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -8,71 +8,102 @@ Python Reference API .. contents:: **List of modules** :local: -.. _ref_network: +.. _ref_datasets: -:mod:`netneurotools.networks` - Constructing networks ------------------------------------------------------ +:mod:`netneurotools.datasets` - Automatic dataset fetching +---------------------------------------------------------- -.. automodule:: netneurotools.networks +.. automodule:: netneurotools.datasets :no-members: :no-inherited-members: -.. currentmodule:: netneurotools.networks +.. currentmodule:: netneurotools.datasets + +To download templates .. autosummary:: :template: function.rst :toctree: generated/ - func_consensus - struct_consensus - threshold_network - binarize_network - match_length_degree_distribution - randmio_und - strength_preserving_rand_sa - strength_preserving_rand_sa_mse_opt - strength_preserving_rand_sa_dir -.. _ref_modularity: + fetch_fsaverage + fetch_hcp_standards + fetch_civet + fetch_conte69 + fetch_yerkes19 -:mod:`netneurotools.modularity` - Calculating network modularity ----------------------------------------------------------------- +To download atlases -.. automodule:: netneurotools.modularity - :no-members: - :no-inherited-members: +.. autosummary:: + :template: function.rst + :toctree: generated/ -.. currentmodule:: netneurotools.modularity + fetch_cammoun2012 + fetch_schaefer2018 + fetch_mmpall + fetch_pauli2018 + fetch_ye2020 + fetch_voneconomo + +To download project-related data .. autosummary:: :template: function.rst :toctree: generated/ - consensus_modularity - zrand - get_modularity - get_modularity_z - get_modularity_sig + fetch_vazquez_rodriguez2019 + fetch_mirchi2018 + fetch_hansen_manynetworks + fetch_hansen_receptors + fetch_hansen_genecognition + fetch_hansen_brainstem + fetch_shafiei_hcpmeg + fetch_suarez_mami + fetch_famous_gmat + fetch_neurosynth + -.. _ref_cluster: +.. _ref_network: -:mod:`netneurotools.cluster` - Working with clusters ----------------------------------------------------- +:mod:`netneurotools.networks` - Constructing networks +----------------------------------------------------- -.. automodule:: netneurotools.cluster +.. automodule:: netneurotools.networks :no-members: :no-inherited-members: -.. currentmodule:: netneurotools.cluster +.. currentmodule:: netneurotools.networks + +To construct consensus networks .. autosummary:: :template: function.rst :toctree: generated/ - find_consensus - match_assignments - reorder_assignments - match_cluster_labels + func_consensus + struct_consensus + +To randomize networks + +.. autosummary:: + :template: function.rst + :toctree: generated/ + + randmio_und + match_length_degree_distribution + strength_preserving_rand_sa + strength_preserving_rand_sa_mse_opt + strength_preserving_rand_sa_dir + +Convenient functions + +.. autosummary:: + :template: function.rst + :toctree: generated/ + + binarize_network + threshold_network + .. _ref_plotting: @@ -85,50 +116,42 @@ Python Reference API .. currentmodule:: netneurotools.plotting +PySurfer + .. autosummary:: :template: function.rst :toctree: generated/ - sort_communities - plot_mod_heatmap plot_conte69 plot_fslr plot_fsaverage plot_fsvertex - plot_point_brain -.. _ref_stats: +Pyvista -:mod:`netneurotools.stats` - General statistics functions ---------------------------------------------------------- +.. autosummary:: + :template: function.rst + :toctree: generated/ -.. automodule:: netneurotools.stats - :no-members: - :no-inherited-members: + pv_plot_surface -.. currentmodule:: netneurotools.stats +matplotlib .. autosummary:: :template: function.rst :toctree: generated/ - gen_spinsamples - residualize - get_mad_outliers - efficient_pearsonr - permtest_1samp - permtest_rel - permtest_pearsonr - get_dominance_stats - network_pearsonr - network_pearsonr_numba - network_pearsonr_pairwise - effective_resistance - network_polarisation - network_variance - network_variance_numba - network_covariance - network_covariance_numba + plot_point_brain + plot_mod_heatmap + +Fun color & colormap stuff + +.. autosummary:: + :template: function.rst + :toctree: generated/ + + available_cmaps + .. _ref_metrics: @@ -141,146 +164,172 @@ Python Reference API .. currentmodule:: netneurotools.metrics +Brain network metrics + .. autosummary:: :template: function.rst :toctree: generated/ - _binarize degrees_und degrees_dir distance_wei_floyd retrieve_shortest_path - communicability_bin - communicability_wei - rich_feeder_peripheral navigation_wu get_navigation_path_length - search_information + communicability_bin + communicability_wei path_transitivity - flow_graph + search_information mean_first_passage_time diffusion_efficiency resource_efficiency_bin + flow_graph + assortativity matching_ind_und - _graph_laplacian - -.. _ref_datasets: + rich_feeder_peripheral -:mod:`netneurotools.datasets` - Automatic dataset fetching ----------------------------------------------------------- +Network spreading -.. automodule:: netneurotools.datasets - :no-members: - :no-inherited-members: +.. autosummary:: + :template: function.rst + :toctree: generated/ -.. currentmodule:: netneurotools.datasets + simulate_atrophy -Functions to download atlases and templates +Statistical network metrics .. autosummary:: :template: function.rst :toctree: generated/ - fetch_cammoun2012 - fetch_civet - fetch_conte69 - fetch_fsaverage - fetch_pauli2018 - fetch_schaefer2018 - fetch_hcp_standards - fetch_voneconomo + network_pearsonr + network_pearsonr_numba + network_pearsonr_pairwise + effective_resistance + network_polarisation + network_variance + network_variance_numba + network_covariance + network_covariance_numba + -Functions to download real-world datasets +.. _ref_modularity: + +:mod:`netneurotools.modularity` - Calculating network modularity +---------------------------------------------------------------- + +.. automodule:: netneurotools.modularity + :no-members: + :no-inherited-members: + +.. currentmodule:: netneurotools.modularity .. autosummary:: :template: function.rst :toctree: generated/ - fetch_connectome - fetch_mirchi2018 - fetch_vazquez_rodriguez2019 + match_cluster_labels + match_assignments + reorder_assignments + find_consensus + consensus_modularity + zrand + get_modularity + get_modularity_z + get_modularity_sig + + +.. _ref_stats: -Functions to generate (pseudo-random) datasets +:mod:`netneurotools.stats` - General statistics functions +--------------------------------------------------------- + +.. automodule:: netneurotools.stats + :no-members: + :no-inherited-members: + +.. currentmodule:: netneurotools.stats + +Correlations .. autosummary:: :template: function.rst :toctree: generated/ - make_correlated_xy + efficient_pearsonr + weighted_pearsonr + make_correlated_xy -.. _ref_freesurfer: +Permutation tests -:mod:`netneurotools.freesurfer` - FreeSurfer compatibility functions --------------------------------------------------------------------- +.. autosummary:: + :template: function.rst + :toctree: generated/ -.. automodule:: netneurotools.freesurfer - :no-members: - :no-inherited-members: + permtest_1samp + permtest_rel + permtest_pearsonr -.. currentmodule:: netneurotools.freesurfer +Regressions .. autosummary:: :template: function.rst :toctree: generated/ - apply_prob_atlas - find_parcel_centroids - parcels_to_vertices - vertices_to_parcels - spin_data - spin_parcels + residualize + get_dominance_stats + -.. _ref_civet: +.. _ref_spatial: -:mod:`netneurotools.civet` - CIVET compatibility functions ----------------------------------------------------------- +:mod:`netneurotools.spatial` - Spatial statistics +------------------------------------------------- -.. automodule:: netneurotools.civet +.. automodule:: netneurotools.spatial :no-members: :no-inherited-members: -.. currentmodule:: netneurotools.civet +.. currentmodule:: netneurotools.spatial + +Calculating spatial statistics .. autosummary:: :template: function.rst :toctree: generated/ - read_civet - civet_to_freesurfer + morans_i + local_morans_i -.. _ref_utils: -:mod:`netneurotools.utils` - Miscellaneous, grab bag utilities --------------------------------------------------------------- +.. _ref_interface: -.. automodule:: netneurotools.utils +:mod:`netneurotools.interface` - Interface for external tools +------------------------------------------------------------- + +.. automodule:: netneurotools.interface :no-members: :no-inherited-members: -.. currentmodule:: netneurotools.utils +.. currentmodule:: netneurotools.interface .. autosummary:: :template: function.rst :toctree: generated/ - run - add_constant - get_triu - get_centroids -.. _ref_colors: +.. _ref_experimental: -:mod:`netneurotools.colors` - Useful colormaps --------------------------------------------------------------- +:mod:`netneurotools.experimental` - Functions in alpha stage +------------------------------------------------------------ -.. automodule:: netneurotools.colors +.. automodule:: netneurotools.experimental :no-members: :no-inherited-members: -.. currentmodule:: netneurotools.colors +.. currentmodule:: netneurotools.experimental .. autosummary:: :template: function.rst :toctree: generated/ - available_cmaps + diff --git a/environment.yml b/environment.yml deleted file mode 100644 index 5984c5b..0000000 --- a/environment.yml +++ /dev/null @@ -1,15 +0,0 @@ -name: netneurotools -channels: - - defaults - - conda-forge -dependencies: - - python>=3.6 - - matplotlib - - nibabel - - nilearn - - "numpy>=1.16" - - pip - - scikit-learn - - "scipy>=1.4.0" - - pip: - - git+https://github.com/aestrivex/bctpy.git#egg=bctpy diff --git a/netneurotools/__init__.py b/netneurotools/__init__.py index 8163cc1..2ca326a 100644 --- a/netneurotools/__init__.py +++ b/netneurotools/__init__.py @@ -1,6 +1,7 @@ -__all__ = [ - '__version__', -] - -from . import _version -__version__ = _version.get_versions()['version'] + +from . import _version +__version__ = _version.get_versions()['version'] + +__all__ = [ + '__version__' +] diff --git a/netneurotools/civet.py b/netneurotools/civet.py deleted file mode 100644 index 5c247c0..0000000 --- a/netneurotools/civet.py +++ /dev/null @@ -1,104 +0,0 @@ -# -*- coding: utf-8 -*- -"""Functions for working with CIVET data (ugh).""" - -import nibabel as nib -import numpy as np -from scipy.interpolate import griddata - -from .datasets import fetch_civet, fetch_fsaverage - -_MNI305to152 = np.array([[0.9975, -0.0073, 0.0176, -0.0429], - [0.0146, 1.0009, -0.0024, 1.5496], - [-0.0130, -0.0093, 0.9971, 1.1840], - [0.0000, 0.0000, 0.0000, 1.0000]]) - - -def read_civet(fname): - """ - Read a CIVET-style .obj geometry file. - - Parameters - ---------- - fname : str or os.PathLike - Filepath to .obj file - - Returns - ------- - vertices : (N, 3) - triangles : (T, 3) - """ - k, polygons = 0, [] - with open(fname, 'r') as src: - n_vert = int(src.readline().split()[6]) - vertices = np.zeros((n_vert, 3)) - for i, line in enumerate(src): - if i < n_vert: - vertices[i] = [float(i) for i in line.split()] - elif i >= (2 * n_vert) + 5: - if not line.strip(): - k = 1 - elif k == 1: - polygons.extend([int(i) for i in line.split()]) - - triangles = np.reshape(np.asarray(polygons), (-1, 3)) - - return vertices, triangles - - -def civet_to_freesurfer(brainmap, surface='mid', version='v1', - freesurfer='fsaverage6', method='nearest', - data_dir=None): - """ - Project `brainmap` in CIVET space to `freesurfer` fsaverage space. - - Uses a nearest-neighbor projection based on the geometry of the vertices - - Parameters - ---------- - brainmap : array_like - CIVET brainmap to be converted to freesurfer space - surface : {'white', 'mid'}, optional - Which CIVET surface to use for geometry of `brainmap`. Default: 'mid' - version : {'v1', 'v2'}, optional - Which CIVET version to use for geometry of `brainmap`. Default: 'v1' - freesurfer : str, optional - Which version of FreeSurfer space to project data to. Must be one of - {'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6'}. - Default: 'fsaverage6' - method : {'nearest', 'linear'}, optional - What method of interpolation to use when projecting the data between - surfaces. Default: 'nearest' - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - - Returns - ------- - data : np.ndarray - Provided `brainmap` mapped to FreeSurfer - """ - brainmap = np.asarray(brainmap) - densities = (81924, 327684) - n_vert = brainmap.shape[0] - if n_vert not in densities: - raise ValueError('Unable to interpret `brainmap` space; provided ' - 'array must have length in {}. Received: {}' - .format(densities, n_vert)) - - n_vert = n_vert // 2 - icbm = fetch_civet(density='41k' if n_vert == 40962 else '164k', - version=version, data_dir=data_dir, verbose=0)[surface] - fsavg = fetch_fsaverage(version=freesurfer, data_dir=data_dir, verbose=0) - fsavg = fsavg['pial' if surface == 'mid' else surface] - - data = [] - for n, hemi in enumerate(('lh', 'rh')): - sl = slice(n_vert * n, n_vert * (n + 1)) - vert_cv, _ = read_civet(getattr(icbm, hemi)) - vert_fs = nib.affines.apply_affine( - _MNI305to152, nib.freesurfer.read_geometry(getattr(fsavg, hemi))[0] - ) - data.append(griddata(vert_cv, brainmap[sl], vert_fs, method=method)) - - return np.hstack(data) diff --git a/netneurotools/datasets/__init__.py b/netneurotools/datasets/__init__.py index cfe50c1..0cd400e 100644 --- a/netneurotools/datasets/__init__.py +++ b/netneurotools/datasets/__init__.py @@ -1,16 +1,53 @@ -"""Functions for fetching and generating datasets.""" +"""Functions for handling datasets.""" + + +from .fetch_template import ( + fetch_fsaverage, fetch_hcp_standards, fetch_civet, + fetch_conte69, fetch_yerkes19 +) + + +from .fetch_atlas import ( + # cortical + fetch_cammoun2012, fetch_schaefer2018, fetch_mmpall, + # subcortical + fetch_pauli2018, fetch_ye2020, + # annotation + fetch_voneconomo +) + + +from .fetch_project import ( + # old projects + fetch_vazquez_rodriguez2019, fetch_mirchi2018, + # new projects + fetch_hansen_manynetworks, fetch_hansen_receptors, fetch_hansen_genecognition, + fetch_hansen_brainstem, fetch_shafiei_hcpmeg, fetch_suarez_mami, + # example data + fetch_famous_gmat, + # resources + fetch_neurosynth +) + +from .datasets_utils import ( + FREESURFER_IGNORE, _get_freesurfer_subjid +) + __all__ = [ - 'fetch_cammoun2012', 'fetch_pauli2018', 'fetch_fsaverage', 'fetch_conte69', - 'fetch_connectome', 'available_connectomes', 'fetch_vazquez_rodriguez2019', - 'fetch_mirchi2018', 'make_correlated_xy', 'fetch_schaefer2018', - 'fetch_hcp_standards', 'fetch_voneconomo', 'fetch_mmpall', 'fetch_civet' + # fetch_template + 'fetch_fsaverage', 'fetch_hcp_standards', 'fetch_civet', + 'fetch_conte69', 'fetch_yerkes19', + # fetch_atlas + 'fetch_cammoun2012', 'fetch_schaefer2018', 'fetch_mmpall', + 'fetch_pauli2018', 'fetch_ye2020', + 'fetch_voneconomo', + # fetch_project + 'fetch_vazquez_rodriguez2019', 'fetch_mirchi2018', + 'fetch_hansen_manynetworks', 'fetch_hansen_receptors', 'fetch_hansen_genecognition', + 'fetch_hansen_brainstem', 'fetch_shafiei_hcpmeg', 'fetch_suarez_mami', + 'fetch_famous_gmat', + 'fetch_neurosynth', + # datasets_utils + 'FREESURFER_IGNORE', '_get_freesurfer_subjid' ] - -from .fetchers import (fetch_cammoun2012, fetch_pauli2018, fetch_fsaverage, - fetch_conte69, fetch_yerkes19, fetch_connectome, - available_connectomes, fetch_vazquez_rodriguez2019, - fetch_schaefer2018, fetch_hcp_standards, - fetch_voneconomo, fetch_mmpall, fetch_civet) -from .generators import (make_correlated_xy) -from .mirchi import (fetch_mirchi2018) diff --git a/netneurotools/datasets/mirchi.py b/netneurotools/datasets/_mirchi2018.py similarity index 71% rename from netneurotools/datasets/mirchi.py rename to netneurotools/datasets/_mirchi2018.py index 190ae63..3ef7e75 100644 --- a/netneurotools/datasets/mirchi.py +++ b/netneurotools/datasets/_mirchi2018.py @@ -1,12 +1,9 @@ -# -*- coding: utf-8 -*- """Code for re-generating results from Mirchi et al., 2018 (SCAN).""" -import os from urllib.request import HTTPError, urlopen import numpy as np -from .utils import _get_data_dir TIMESERIES = ("https://s3.amazonaws.com/openneuro/ds000031/ds000031_R1.0.2" @@ -134,45 +131,4 @@ def _get_panas(data_dir=None, resume=True, verbose=1): return measures -def fetch_mirchi2018(data_dir=None, resume=True, verbose=1): - """ - Download (and creates) dataset for replicating Mirchi et al., 2018, SCAN. - - Parameters - ---------- - data_dir : str, optional - Directory to check for existing data files (if they exist) or to save - generated data files. Files should be named mirchi2018_fc.npy and - mirchi2018_panas.csv for the functional connectivity and behavioral - data, respectively. - - Returns - ------- - X : (73, 198135) numpy.ndarray - Functional connections from MyConnectome rsfMRI time series data - Y : (73, 13) numpy.ndarray - PANAS subscales from MyConnectome behavioral data - """ - data_dir = os.path.join(_get_data_dir(data_dir=data_dir), 'ds-mirchi2018') - os.makedirs(data_dir, exist_ok=True) - - X_fname = os.path.join(data_dir, 'myconnectome_fc.npy') - Y_fname = os.path.join(data_dir, 'myconnectome_panas.csv') - - if not os.path.exists(X_fname): - X = _get_fc(data_dir=data_dir, resume=resume, verbose=verbose) - np.save(X_fname, X, allow_pickle=False) - else: - X = np.load(X_fname, allow_pickle=False) - - if not os.path.exists(Y_fname): - Y = _get_panas(data_dir=data_dir, resume=resume, verbose=verbose) - np.savetxt(Y_fname, np.column_stack(list(Y.values())), - header=','.join(Y.keys()), delimiter=',', fmt='%i') - # convert dictionary to structured array before returning - Y = np.array([tuple(row) for row in np.column_stack(list(Y.values()))], - dtype=dict(names=list(Y.keys()), formats=['i8'] * len(Y))) - else: - Y = np.genfromtxt(Y_fname, delimiter=',', names=True, dtype=int) - return X, Y diff --git a/netneurotools/data/osf.json b/netneurotools/datasets/datasets.json similarity index 99% rename from netneurotools/data/osf.json rename to netneurotools/datasets/datasets.json index 289ecbc..80bb3e3 100644 --- a/netneurotools/data/osf.json +++ b/netneurotools/datasets/datasets.json @@ -147,7 +147,7 @@ } } }, - "ds-connectomes": { + "ds-famous-gmat": { "celegans": { "url": [ "mb37e", diff --git a/netneurotools/datasets/datasets_utils.py b/netneurotools/datasets/datasets_utils.py new file mode 100644 index 0000000..f4bae92 --- /dev/null +++ b/netneurotools/datasets/datasets_utils.py @@ -0,0 +1,185 @@ +# -*- coding: utf-8 -*- +"""Utilites for loading / creating datasets.""" + +import json +import os +from collections import namedtuple +import importlib.resources + +if getattr(importlib.resources, 'files', None) is not None: + _importlib_avail = True +else: + from pkg_resources import resource_filename + _importlib_avail = False + + +SURFACE = namedtuple('Surface', ('lh', 'rh')) + +FREESURFER_IGNORE = [ + 'unknown', 'corpuscallosum', 'Background+FreeSurfer_Defined_Medial_Wall' +] + +def _osfify_urls(data): + """ + Format `data` object with OSF API URL. + + Parameters + ---------- + data : object + If dict with a `url` key, will format OSF_API with relevant values + + Returns + ------- + data : object + Input data with all `url` dict keys formatted + """ + OSF_API = "https://files.osf.io/v1/resources/{}/providers/osfstorage/{}" + + if isinstance(data, str): + return data + elif 'url' in data: + data['url'] = OSF_API.format(*data['url']) + + try: + for key, value in data.items(): + data[key] = _osfify_urls(value) + except AttributeError: + for n, value in enumerate(data): + data[n] = _osfify_urls(value) + + return data + + +if _importlib_avail: + osf = importlib.resources.files("netneurotools") / "datasets/datasets.json" +else: + osf = resource_filename('netneurotools', 'datasets/datasets.json') + +with open(osf) as src: + OSF_RESOURCES = _osfify_urls(json.load(src)) + + +def _get_dataset_info(name): + """ + Return url and MD5 checksum for dataset `name`. + + Parameters + ---------- + name : str + Name of dataset + + Returns + ------- + url : str + URL from which to download dataset + md5 : str + MD5 checksum for file downloade from `url` + """ + try: + return OSF_RESOURCES[name] + except KeyError: + raise KeyError("Provided dataset '{}' is not valid. Must be one of: {}" + .format(name, sorted(OSF_RESOURCES.keys()))) from None + + +def _get_data_dir(data_dir=None): + """ + Get path to netneurotools data directory. + + Parameters + ---------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + + Returns + ------- + data_dir : str + Path to use as data directory + """ + if data_dir is None: + data_dir = os.environ.get('NNT_DATA', os.path.join('~', 'nnt-data')) + data_dir = os.path.expanduser(data_dir) + if not os.path.exists(data_dir): + os.makedirs(data_dir) + + return data_dir + + +def _check_freesurfer_subjid(subject_id, subjects_dir=None): + """ + Check that `subject_id` exists in provided FreeSurfer `subjects_dir`. + + Parameters + ---------- + subject_id : str + FreeSurfer subject ID + subjects_dir : str, optional + Path to FreeSurfer subject directory. If not set, will inherit from + the environmental variable $SUBJECTS_DIR. Default: None + + Returns + ------- + subject_id : str + FreeSurfer subject ID, as provided + subjects_dir : str + Full filepath to `subjects_dir` + + Raises + ------ + FileNotFoundError + """ + # check inputs for subjects_dir and subject_id + if subjects_dir is None or not os.path.isdir(subjects_dir): + try: + subjects_dir = os.environ['SUBJECTS_DIR'] + except KeyError: + subjects_dir = os.getcwd() + else: + subjects_dir = os.path.abspath(subjects_dir) + + subjdir = os.path.join(subjects_dir, subject_id) + if not os.path.isdir(subjdir): + raise FileNotFoundError('Cannot find specified subject id {} in ' + 'provided subject directory {}.' + .format(subject_id, subjects_dir)) + + return subject_id, subjects_dir + + +def _get_freesurfer_subjid(subject_id, subjects_dir=None): + """ + Get fsaverage version `subject_id`, fetching if required. + + Parameters + ---------- + subject_id : str + FreeSurfer subject ID + subjects_dir : str, optional + Path to FreeSurfer subject directory. If not set, will inherit from + the environmental variable $SUBJECTS_DIR. Default: None + + Returns + ------- + subject_id : str + FreeSurfer subject ID + subjects_dir : str + Path to subject directory with `subject_id` + """ + + # check for FreeSurfer install w/fsaverage; otherwise, fetch required + try: + subject_id, subjects_dir = _check_freesurfer_subjid(subject_id, subjects_dir) + except FileNotFoundError: + if 'fsaverage' not in subject_id: + raise ValueError('Provided subject {} does not exist in provided ' + 'subjects_dir {}' + .format(subject_id, subjects_dir)) from None + from ..datasets import fetch_fsaverage + from ..datasets import _get_data_dir + fetch_fsaverage(subject_id) + subjects_dir = os.path.join(_get_data_dir(), 'tpl-fsaverage') + subject_id, subjects_dir = _check_freesurfer_subjid(subject_id, subjects_dir) + + return subject_id, subjects_dir \ No newline at end of file diff --git a/netneurotools/datasets/fetch_atlas.py b/netneurotools/datasets/fetch_atlas.py new file mode 100644 index 0000000..bc19e42 --- /dev/null +++ b/netneurotools/datasets/fetch_atlas.py @@ -0,0 +1,413 @@ +"""Functions for fetching atlas data.""" +import itertools +import os.path as op +import warnings + +try: + # nilearn 0.10.3 + from nilearn.datasets._utils import fetch_files as _fetch_files +except ImportError: + from nilearn.datasets.utils import _fetch_files + +from sklearn.utils import Bunch + +from .datasets_utils import ( + SURFACE, + _get_data_dir, _get_dataset_info +) + +def fetch_cammoun2012(version='MNI152NLin2009aSym', data_dir=None, url=None, + resume=True, verbose=1): + """ + Download files for Cammoun et al., 2012 multiscale parcellation. + + Parameters + ---------- + version : str, optional + Specifies which version of the dataset to download, where + 'MNI152NLin2009aSym' will return .nii.gz atlas files defined in MNI152 + space, 'fsaverageX' will return .annot files defined in fsaverageX + space (FreeSurfer 6.0.1), 'fslr32k' will return .label.gii files in + fs_LR_32k HCP space, and 'gcs' will return FreeSurfer-style .gcs + probabilistic atlas files for generating new, subject-specific + parcellations. Default: 'MNI152NLin2009aSym' + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + url : str, optional + URL from which to download data. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: + True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with keys ['scale033', 'scale060', 'scale125', + 'scale250', 'scale500'], where corresponding values are lists of + filepaths to downloaded parcellation files. + + References + ---------- + Cammoun, L., Gigandet, X., Meskaldji, D., Thiran, J. P., Sporns, O., Do, K. + Q., Maeder, P., and Meuli, R., & Hagmann, P. (2012). Mapping the human + connectome at multiple scales with diffusion spectrum MRI. Journal of + Neuroscience Methods, 203(2), 386-397. + + Notes + ----- + License: https://raw.githubusercontent.com/LTS5/cmp/master/COPYRIGHT + """ + if version == 'surface': + warnings.warn('Providing `version="surface"` is deprecated and will ' + 'be removed in a future release. For consistent ' + 'behavior please use `version="fsaverage"` instead.', + DeprecationWarning, stacklevel=2) + version = 'fsaverage' + elif version == 'volume': + warnings.warn('Providing `version="volume"` is deprecated and will ' + 'be removed in a future release. For consistent ' + 'behavior please use `version="MNI152NLin2009aSym"` ' + 'instead.', + DeprecationWarning, stacklevel=2) + version = 'MNI152NLin2009aSym' + + versions = [ + 'gcs', 'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k', + 'MNI152NLin2009aSym' + ] + if version not in versions: + raise ValueError('The version of Cammoun et al., 2012 parcellation ' + 'requested "{}" does not exist. Must be one of {}' + .format(version, versions)) + + dataset_name = 'atl-cammoun2012' + keys = ['scale033', 'scale060', 'scale125', 'scale250', 'scale500'] + + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(dataset_name)[version] + if url is None: + url = info['url'] + + opts = { + 'uncompress': True, + 'md5sum': info['md5'], + 'move': '{}.tar.gz'.format(dataset_name) + } + + # filenames differ based on selected version of dataset + if version == 'MNI152NLin2009aSym': + filenames = [ + 'atl-Cammoun2012_space-MNI152NLin2009aSym_res-{}_deterministic{}' + .format(res[-3:], suff) for res in keys for suff in ['.nii.gz'] + ] + ['atl-Cammoun2012_space-MNI152NLin2009aSym_info.csv'] + elif version == 'fslr32k': + filenames = [ + 'atl-Cammoun2012_space-fslr32k_res-{}_hemi-{}_deterministic{}' + .format(res[-3:], hemi, suff) for res in keys + for hemi in ['L', 'R'] for suff in ['.label.gii'] + ] + elif version in ('fsaverage', 'fsaverage5', 'fsaverage6'): + filenames = [ + 'atl-Cammoun2012_space-{}_res-{}_hemi-{}_deterministic{}' + .format(version, res[-3:], hemi, suff) for res in keys + for hemi in ['L', 'R'] for suff in ['.annot'] + ] + else: + filenames = [ + 'atl-Cammoun2012_res-{}_hemi-{}_probabilistic{}' + .format(res[5:], hemi, suff) + for res in keys[:-1] + ['scale500v1', 'scale500v2', 'scale500v3'] + for hemi in ['L', 'R'] for suff in ['.gcs', '.ctab'] + ] + + files = [ + (op.join(dataset_name, version, f), url, opts) for f in filenames + ] + data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose) + + if version == 'MNI152NLin2009aSym': + keys += ['info'] + elif version in ('fslr32k', 'fsaverage', 'fsaverage5', 'fsaverage6'): + data = [SURFACE(*data[i:i + 2]) for i in range(0, len(data), 2)] + else: + data = [data[::2][i:i + 2] for i in range(0, len(data) // 2, 2)] + # deal with the fact that last scale is split into three files :sigh: + data = data[:-3] + [list(itertools.chain.from_iterable(data[-3:]))] + + return Bunch(**dict(zip(keys, data))) + + + +def fetch_schaefer2018(version='fsaverage', data_dir=None, url=None, + resume=True, verbose=1): + """ + Download FreeSurfer .annot files for Schaefer et al., 2018 parcellation. + + Parameters + ---------- + version : {'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k'} + Specifies which surface annotation files should be matched to. Default: + 'fsaverage' + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + url : str, optional + URL from which to download data. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: + True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with keys of format '{}Parcels{}Networks' where + corresponding values are the left/right hemisphere annotation files + + References + ---------- + Schaefer, A., Kong, R., Gordon, E. M., Laumann, T. O., Zuo, X. N., Holmes, + A. J., ... & Yeo, B. T. (2017). Local-global parcellation of the human + cerebral cortex from intrinsic functional connectivity MRI. Cerebral + Cortex, 28(9), 3095-3114. + + Notes + ----- + License: https://github.com/ThomasYeoLab/CBIG/blob/master/LICENSE.md + """ + versions = ['fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k'] + if version not in versions: + raise ValueError('The version of Schaefer et al., 2018 parcellation ' + 'requested "{}" does not exist. Must be one of {}' + .format(version, versions)) + + dataset_name = 'atl-schaefer2018' + keys = [ + '{}Parcels{}Networks'.format(p, n) + for p in range(100, 1001, 100) for n in [7, 17] + ] + + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(dataset_name)[version] + if url is None: + url = info['url'] + + opts = { + 'uncompress': True, + 'md5sum': info['md5'], + 'move': '{}.tar.gz'.format(dataset_name) + } + + if version == 'fslr32k': + hemispheres, suffix = ['LR'], 'dlabel.nii' + else: + hemispheres, suffix = ['L', 'R'], 'annot' + filenames = [ + 'atl-Schaefer2018_space-{}_hemi-{}_desc-{}_deterministic.{}' + .format(version, hemi, desc, suffix) + for desc in keys for hemi in hemispheres + ] + + files = [(op.join(dataset_name, version, f), url, opts) + for f in filenames] + data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose) + + if suffix == 'annot': + data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)] + + return Bunch(**dict(zip(keys, data))) + + + +def fetch_mmpall(version='fslr32k', data_dir=None, url=None, resume=True, + verbose=1): + """ + Download .label.gii files for Glasser et al., 2016 MMPAll atlas. + + Parameters + ---------- + version : {'fslr32k'} + Specifies which surface annotation files should be matched to. Default: + 'fslr32k' + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + url : str, optional + URL from which to download data. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: + True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Namedtuple with fields ('lh', 'rh') corresponding to filepaths to + left/right hemisphere parcellation files + + References + ---------- + Glasser, M. F., Coalson, T. S., Robinson, E. C., Hacker, C. D., Harwell, + J., Yacoub, E., ... & Van Essen, D. C. (2016). A multi-modal parcellation + of human cerebral cortex. Nature, 536(7615), 171-178. + + Notes + ----- + License: https://www.humanconnectome.org/study/hcp-young-adult/document/ + wu-minn-hcp-consortium-open-access-data-use-terms + """ + versions = ['fslr32k'] + if version not in versions: + raise ValueError('The version of Glasser et al., 2016 parcellation ' + 'requested "{}" does not exist. Must be one of {}' + .format(version, versions)) + + dataset_name = 'atl-mmpall' + + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(dataset_name)[version] + if url is None: + url = info['url'] + opts = { + 'uncompress': True, + 'md5sum': info['md5'], + 'move': '{}.tar.gz'.format(dataset_name) + } + + hemispheres = ['L', 'R'] + filenames = [ + 'atl-MMPAll_space-{}_hemi-{}_deterministic.label.gii' + .format(version, hemi) for hemi in hemispheres + ] + + files = [(op.join(dataset_name, version, f), url, opts) for f in filenames] + data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose) + + return SURFACE(*data) + + + +def fetch_pauli2018(data_dir=None, url=None, resume=True, verbose=1): + """ + Download files for Pauli et al., 2018 subcortical parcellation. + + Parameters + ---------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + url : str, optional + URL from which to download data. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: + True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with keys ['probabilistic', 'deterministic'], + where corresponding values are filepaths to downloaded atlas files. + + References + ---------- + Pauli, W. M., Nili, A. N., & Tyszka, J. M. (2018). A high-resolution + probabilistic in vivo atlas of human subcortical brain nuclei. Scientific + Data, 5, 180063. + + Notes + ----- + License: CC-BY Attribution 4.0 International + """ + dataset_name = 'atl-pauli2018' + keys = ['probabilistic', 'deterministic', 'info'] + + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(dataset_name) + + # format the query how _fetch_files() wants things and then download data + files = [ + (i['name'], i['url'], dict(md5sum=i['md5'], move=i['name'])) + for i in info + ] + + data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose) + + return Bunch(**dict(zip(keys, data))) + + +def fetch_ye2020(): + """Fetch Ye et al., 2020 subcortical parcellation.""" + pass + + + +def fetch_voneconomo(data_dir=None, url=None, resume=True, verbose=1): + """ + Fetch von-Economo Koskinas probabilistic FreeSurfer atlas. + + Parameters + ---------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + url : str, optional + URL from which to download data. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: + True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with keys ['gcs', 'ctab', 'info'] + + References + ---------- + Scholtens, L. H., de Reus, M. A., de Lange, S. C., Schmidt, R., & van den + Heuvel, M. P. (2018). An MRI von Economo–Koskinas atlas. NeuroImage, 170, + 249-256. + + Notes + ----- + License: CC-BY-NC-SA 4.0 + """ + dataset_name = 'atl-voneconomo_koskinas' + keys = ['gcs', 'ctab', 'info'] + + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(dataset_name) + if url is None: + url = info['url'] + opts = { + 'uncompress': True, + 'md5sum': info['md5'], + 'move': '{}.tar.gz'.format(dataset_name) + } + filenames = [ + 'atl-vonEconomoKoskinas_hemi-{}_probabilistic.{}'.format(hemi, suff) + for hemi in ['L', 'R'] for suff in ['gcs', 'ctab'] + ] + ['atl-vonEconomoKoskinas_info.csv'] + files = [(op.join(dataset_name, f), url, opts) for f in filenames] + data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose) + data = [SURFACE(*data[:-1:2])] + [SURFACE(*data[1:-1:2])] + [data[-1]] + + return Bunch(**dict(zip(keys, data))) diff --git a/netneurotools/datasets/fetch_project.py b/netneurotools/datasets/fetch_project.py new file mode 100644 index 0000000..c0cd94a --- /dev/null +++ b/netneurotools/datasets/fetch_project.py @@ -0,0 +1,221 @@ +"""Functions for fetching project data.""" +import os +import os.path as op +import numpy as np + +try: + # nilearn 0.10.3 + from nilearn.datasets._utils import fetch_files as _fetch_files +except ImportError: + from nilearn.datasets.utils import _fetch_files + +from sklearn.utils import Bunch + +from .datasets_utils import ( + _get_data_dir, _get_dataset_info +) + +from ._mirchi2018 import _get_fc, _get_panas + + +def fetch_vazquez_rodriguez2019(data_dir=None, url=None, resume=True, + verbose=1): + """ + Download files from Vazquez-Rodriguez et al., 2019, PNAS. + + Parameters + ---------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + url : str, optional + URL from which to download data. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: + True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + Returns + ------- + data : :class:`sklearn.utils.Bunch` + Dictionary-like object with keys ['rsquared', 'gradient'] containing + 1000 values from + + References + ---------- + See `ref` key of returned dictionary object for relevant dataset reference + """ + dataset_name = 'ds-vazquez_rodriguez2019' + + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(dataset_name) + if url is None: + url = info['url'] + opts = { + 'uncompress': True, + 'md5sum': info['md5'], + 'move': '{}.tar.gz'.format(dataset_name) + } + + filenames = [ + op.join(dataset_name, 'rsquared_gradient.csv') + ] + data = _fetch_files(data_dir, files=[(f, url, opts) for f in filenames], + resume=resume, verbose=verbose) + + # load data + rsq, grad = np.loadtxt(data[0], delimiter=',', skiprows=1).T + + return Bunch(rsquared=rsq, gradient=grad) + + +def fetch_mirchi2018(data_dir=None, resume=True, verbose=1): + """ + Download (and creates) dataset for replicating Mirchi et al., 2018, SCAN. + + Parameters + ---------- + data_dir : str, optional + Directory to check for existing data files (if they exist) or to save + generated data files. Files should be named mirchi2018_fc.npy and + mirchi2018_panas.csv for the functional connectivity and behavioral + data, respectively. + + Returns + ------- + X : (73, 198135) numpy.ndarray + Functional connections from MyConnectome rsfMRI time series data + Y : (73, 13) numpy.ndarray + PANAS subscales from MyConnectome behavioral data + """ + data_dir = os.path.join(_get_data_dir(data_dir=data_dir), 'ds-mirchi2018') + os.makedirs(data_dir, exist_ok=True) + + X_fname = os.path.join(data_dir, 'myconnectome_fc.npy') + Y_fname = os.path.join(data_dir, 'myconnectome_panas.csv') + + if not os.path.exists(X_fname): + X = _get_fc(data_dir=data_dir, resume=resume, verbose=verbose) + np.save(X_fname, X, allow_pickle=False) + else: + X = np.load(X_fname, allow_pickle=False) + + if not os.path.exists(Y_fname): + Y = _get_panas(data_dir=data_dir, resume=resume, verbose=verbose) + np.savetxt(Y_fname, np.column_stack(list(Y.values())), + header=','.join(Y.keys()), delimiter=',', fmt='%i') + # convert dictionary to structured array before returning + Y = np.array([tuple(row) for row in np.column_stack(list(Y.values()))], + dtype=dict(names=list(Y.keys()), formats=['i8'] * len(Y))) + else: + Y = np.genfromtxt(Y_fname, delimiter=',', names=True, dtype=int) + + return X, Y + + +def fetch_hansen_manynetworks(): + """Download files from Hansen et al., 2023, PLOS Biology.""" + pass + +def fetch_hansen_receptors(): + """Download files from Hansen et al., 2022, Nature Neuroscience.""" + pass + +def fetch_hansen_genecognition(): + """Download files from Hansen et al., 2021, Nature Human Behaviour.""" + pass + +def fetch_hansen_brainstem(): + """Download files from Hansen et al., 2024.""" + pass + +def fetch_shafiei_hcpmeg(): + """Download files from Shafiei et al., 2022 & Shafiei et al., 2023.""" + pass + +def fetch_suarez_mami(): + """Download files from Suarez et al., 2022, eLife.""" + pass + + + +def fetch_famous_gmat(dataset, data_dir=None, url=None, resume=True, + verbose=1): + """ + Download files from multi-species connectomes. + + Parameters + ---------- + dataset : str + Specifies which dataset to download; must be one of the datasets listed + in :func:`netneurotools.datasets.available_connectomes()`. + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + url : str, optional + URL from which to download data. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: + True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + Returns + ------- + data : :class:`sklearn.utils.Bunch` + Dictionary-like object with, at a minimum, keys ['conn', 'labels', + 'ref'] providing connectivity / correlation matrix, region labels, and + relevant reference. Other possible keys include 'dist' (an array of + Euclidean distances between regions of 'conn'), 'coords' (an array of + xyz coordinates for regions of 'conn'), 'acronyms' (an array of + acronyms for regions of 'conn'), and 'networks' (an array of network + affiliations for regions of 'conn') + + References + ---------- + See `ref` key of returned dictionary object for relevant dataset reference + """ + available_connectomes = sorted(_get_dataset_info('ds-famous-gmat').keys()) + + if dataset not in available_connectomes: + raise ValueError('Provided dataset {} not available; must be one of {}' + .format(dataset, available_connectomes)) + + dataset_name = 'ds-famous-gmat' + + data_dir = op.join(_get_data_dir(data_dir=data_dir), dataset_name) + info = _get_dataset_info(dataset_name)[dataset] + if url is None: + url = info['url'] + opts = { + 'uncompress': True, + 'md5sum': info['md5'], + 'move': '{}.tar.gz'.format(dataset) + } + + filenames = [ + op.join(dataset, '{}.csv'.format(fn)) for fn in info['keys'] + ] + [op.join(dataset, 'ref.txt')] + data = _fetch_files(data_dir, files=[(f, url, opts) for f in filenames], + resume=resume, verbose=verbose) + + # load data + for n, arr in enumerate(data[:-1]): + try: + data[n] = np.loadtxt(arr, delimiter=',') + except ValueError: + data[n] = np.loadtxt(arr, delimiter=',', dtype=str) + with open(data[-1]) as src: + data[-1] = src.read().strip() + + return Bunch(**dict(zip(info['keys'] + ['ref'], data))) + + +def fetch_neurosynth(): + """Download Neurosynth data.""" + pass diff --git a/netneurotools/datasets/fetch_template.py b/netneurotools/datasets/fetch_template.py new file mode 100644 index 0000000..9a88369 --- /dev/null +++ b/netneurotools/datasets/fetch_template.py @@ -0,0 +1,353 @@ +"""Functions for fetching template data.""" + + +import json +import os.path as op + +try: + # nilearn 0.10.3 + from nilearn.datasets._utils import fetch_files as _fetch_files +except ImportError: + from nilearn.datasets.utils import _fetch_files + +from sklearn.utils import Bunch + +from .datasets_utils import ( + SURFACE, + _get_data_dir, _get_dataset_info, _check_freesurfer_subjid +) + + +def fetch_fsaverage(version='fsaverage', data_dir=None, url=None, resume=True, + verbose=1): + """ + Download files for fsaverage FreeSurfer template. + + Parameters + ---------- + version : str, optional + One of {'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', + 'fsaverage6'}. Default: 'fsaverage' + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + url : str, optional + URL from which to download data. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: + True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with keys ['surf'] where corresponding values + are length-2 lists downloaded template files (each list composed of + files for the left and right hemisphere). + """ + versions = [ + 'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6' + ] + if version not in versions: + raise ValueError('The version of fsaverage requested "{}" does not ' + 'exist. Must be one of {}'.format(version, versions)) + + dataset_name = 'tpl-fsaverage' + keys = ['orig', 'white', 'smoothwm', 'pial', 'inflated', 'sphere'] + + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(dataset_name)[version] + if url is None: + url = info['url'] + + opts = { + 'uncompress': True, + 'md5sum': info['md5'], + 'move': '{}.tar.gz'.format(dataset_name) + } + + filenames = [ + op.join(version, 'surf', '{}.{}'.format(hemi, surf)) + for surf in keys for hemi in ['lh', 'rh'] + ] + + try: + data_dir = _check_freesurfer_subjid(version)[1] + data = [op.join(data_dir, f) for f in filenames] + except FileNotFoundError: + data = _fetch_files(data_dir, resume=resume, verbose=verbose, + files=[(op.join(dataset_name, f), url, opts) + for f in filenames]) + + data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)] + + return Bunch(**dict(zip(keys, data))) + + + + +def fetch_hcp_standards(data_dir=None, url=None, resume=True, verbose=1): + """ + Fetch HCP standard mesh atlases for converting between FreeSurfer and HCP. + + Parameters + ---------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + url : str, optional + URL from which to download data. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: + True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + Returns + ------- + standards : str + Filepath to standard_mesh_atlases directory + """ + if url is None: + url = 'https://web.archive.org/web/20220121035833/' + \ + 'http://brainvis.wustl.edu/workbench/standard_mesh_atlases.zip' + dataset_name = 'standard_mesh_atlases' + data_dir = _get_data_dir(data_dir=data_dir) + opts = { + 'uncompress': True, + 'move': '{}.zip'.format(dataset_name) + } + filenames = [ + 'L.sphere.32k_fs_LR.surf.gii', 'R.sphere.32k_fs_LR.surf.gii' + ] + files = [(op.join(dataset_name, f), url, opts) for f in filenames] + _fetch_files(data_dir, files=files, resume=resume, verbose=verbose) + + return op.join(data_dir, dataset_name) + +def fetch_civet(density='41k', version='v1', data_dir=None, url=None, + resume=True, verbose=1): + """ + Fetch CIVET surface files. + + Parameters + ---------- + density : {'41k', '164k'}, optional + Which density of the CIVET-space geometry files to fetch. The + high-resolution '164k' surface only exists for version 'v2' + version : {'v1, 'v2'}, optional + Which version of the CIVET surfaces to use. Default: 'v2' + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + url : str, optional + URL from which to download data. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: + True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with keys ['mid', 'white'] containing geometry + files for CIVET surface. Note for version 'v1' the 'mid' and 'white' + files are identical. + + References + ---------- + Y. Ad-Dab’bagh, O. Lyttelton, J.-S. Muehlboeck, C. Lepage, D. Einarson, K. + Mok, O. Ivanov, R. Vincent, J. Lerch, E. Fombonne, A. C. Evans, The CIVET + image-processing environment: A fully automated comprehensive pipeline for + anatomical neuroimaging research. Proceedings of the 12th Annual Meeting of + the Organization for Human Brain Mapping (2006). + + Notes + ----- + License: https://github.com/aces/CIVET_Full_Project/blob/master/LICENSE + """ + densities = ['41k', '164k'] + if density not in densities: + raise ValueError('The density of CIVET requested "{}" does not exist. ' + 'Must be one of {}'.format(density, densities)) + versions = ['v1', 'v2'] + if version not in versions: + raise ValueError('The version of CIVET requested "{}" does not exist. ' + 'Must be one of {}'.format(version, versions)) + + if version == 'v1' and density == '164k': + raise ValueError('The "164k" density CIVET surface only exists for ' + 'version "v2"') + + dataset_name = 'tpl-civet' + keys = ['mid', 'white'] + + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(dataset_name)[version]['civet{}'.format(density)] + if url is None: + url = info['url'] + + opts = { + 'uncompress': True, + 'md5sum': info['md5'], + 'move': '{}.tar.gz'.format(dataset_name) + } + filenames = [ + op.join(dataset_name, version, 'civet{}'.format(density), + 'tpl-civet_space-ICBM152_hemi-{}_den-{}_{}.obj' + .format(hemi, density, surf)) + for surf in keys for hemi in ['L', 'R'] + ] + + data = _fetch_files(data_dir, resume=resume, verbose=verbose, + files=[(f, url, opts) for f in filenames]) + + data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)] + + return Bunch(**dict(zip(keys, data))) + + +def fetch_conte69(data_dir=None, url=None, resume=True, verbose=1): + """ + Download files for Van Essen et al., 2012 Conte69 template. + + Parameters + ---------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + url : str, optional + URL from which to download data. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: + True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with keys ['midthickness', 'inflated', + 'vinflated'], where corresponding values are lists of filepaths to + downloaded template files. + + References + ---------- + http://brainvis.wustl.edu/wiki/index.php//Caret:Atlases/Conte69_Atlas + + Van Essen, D. C., Glasser, M. F., Dierker, D. L., Harwell, J., & Coalson, + T. (2011). Parcellations and hemispheric asymmetries of human cerebral + cortex analyzed on surface-based atlases. Cerebral cortex, 22(10), + 2241-2262. + + Notes + ----- + License: ??? + """ + dataset_name = 'tpl-conte69' + keys = ['midthickness', 'inflated', 'vinflated'] + + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(dataset_name) + if url is None: + url = info['url'] + + opts = { + 'uncompress': True, + 'md5sum': info['md5'], + 'move': '{}.tar.gz'.format(dataset_name) + } + + filenames = [ + 'tpl-conte69/tpl-conte69_space-MNI305_variant-fsLR32k_{}.{}.surf.gii' + .format(res, hemi) for res in keys for hemi in ['L', 'R'] + ] + ['tpl-conte69/template_description.json'] + + data = _fetch_files(data_dir, files=[(f, url, opts) for f in filenames], + resume=resume, verbose=verbose) + + with open(data[-1], 'r') as src: + data[-1] = json.load(src) + + # bundle hemispheres together + data = [SURFACE(*data[:-1][i:i + 2]) for i in range(0, 6, 2)] + [data[-1]] + + return Bunch(**dict(zip(keys + ['info'], data))) + + +def fetch_yerkes19(data_dir=None, url=None, resume=None, verbose=1): + """ + Download files for Donahue et al., 2016 Yerkes19 template. + + Parameters + ---------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + url : str, optional + URL from which to download data. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: + True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with keys ['midthickness', 'inflated', + 'vinflated'], where corresponding values are lists of filepaths to + downloaded template files. + + References + ---------- + https://balsa.wustl.edu/reference/show/976nz + + Donahue, C. J., Sotiropoulos, S. N., Jbabdi, S., Hernandez-Fernandez, M., + Behrens, T. E., Dyrby, T. B., ... & Glasser, M. F. (2016). Using diffusion + tractography to predict cortical connection strength and distance: a + quantitative comparison with tracers in the monkey. Journal of + Neuroscience, 36(25), 6758-6770. + + Notes + ----- + License: ??? + """ + dataset_name = 'tpl-yerkes19' + keys = ['midthickness', 'inflated', 'vinflated'] + + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(dataset_name) + if url is None: + url = info['url'] + + opts = { + 'uncompress': True, + 'md5sum': info['md5'], + 'move': '{}.tar.gz'.format(dataset_name) + } + + filenames = [ + 'tpl-yerkes19/tpl-yerkes19_space-fsLR32k_{}.{}.surf.gii' + .format(res, hemi) for res in keys for hemi in ['L', 'R'] + ] + + data = _fetch_files(data_dir, files=[(f, url, opts) for f in filenames], + resume=resume, verbose=verbose) + + # bundle hemispheres together + data = [SURFACE(*data[i:i + 2]) for i in range(0, 6, 2)] + + return Bunch(**dict(zip(keys + ['info'], data))) diff --git a/netneurotools/datasets/fetchers.py b/netneurotools/datasets/fetchers.py deleted file mode 100644 index b2fa95d..0000000 --- a/netneurotools/datasets/fetchers.py +++ /dev/null @@ -1,882 +0,0 @@ -# -*- coding: utf-8 -*- -"""Functions for fetching datasets from the internet.""" - -from collections import namedtuple -import itertools -import json -import os.path as op -import warnings - -try: - # nilearn 0.10.3 - from nilearn.datasets._utils import fetch_files as _fetch_files -except ImportError: - from nilearn.datasets.utils import _fetch_files - -import numpy as np -from sklearn.utils import Bunch - -from .utils import _get_data_dir, _get_dataset_info -from ..utils import check_fs_subjid - -SURFACE = namedtuple('Surface', ('lh', 'rh')) - - -def fetch_cammoun2012(version='MNI152NLin2009aSym', data_dir=None, url=None, - resume=True, verbose=1): - """ - Download files for Cammoun et al., 2012 multiscale parcellation. - - Parameters - ---------- - version : str, optional - Specifies which version of the dataset to download, where - 'MNI152NLin2009aSym' will return .nii.gz atlas files defined in MNI152 - space, 'fsaverageX' will return .annot files defined in fsaverageX - space (FreeSurfer 6.0.1), 'fslr32k' will return .label.gii files in - fs_LR_32k HCP space, and 'gcs' will return FreeSurfer-style .gcs - probabilistic atlas files for generating new, subject-specific - parcellations. Default: 'MNI152NLin2009aSym' - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 - - Returns - ------- - filenames : :class:`sklearn.utils.Bunch` - Dictionary-like object with keys ['scale033', 'scale060', 'scale125', - 'scale250', 'scale500'], where corresponding values are lists of - filepaths to downloaded parcellation files. - - References - ---------- - Cammoun, L., Gigandet, X., Meskaldji, D., Thiran, J. P., Sporns, O., Do, K. - Q., Maeder, P., and Meuli, R., & Hagmann, P. (2012). Mapping the human - connectome at multiple scales with diffusion spectrum MRI. Journal of - Neuroscience Methods, 203(2), 386-397. - - Notes - ----- - License: https://raw.githubusercontent.com/LTS5/cmp/master/COPYRIGHT - """ - if version == 'surface': - warnings.warn('Providing `version="surface"` is deprecated and will ' - 'be removed in a future release. For consistent ' - 'behavior please use `version="fsaverage"` instead.', - DeprecationWarning, stacklevel=2) - version = 'fsaverage' - elif version == 'volume': - warnings.warn('Providing `version="volume"` is deprecated and will ' - 'be removed in a future release. For consistent ' - 'behavior please use `version="MNI152NLin2009aSym"` ' - 'instead.', - DeprecationWarning, stacklevel=2) - version = 'MNI152NLin2009aSym' - - versions = [ - 'gcs', 'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k', - 'MNI152NLin2009aSym' - ] - if version not in versions: - raise ValueError('The version of Cammoun et al., 2012 parcellation ' - 'requested "{}" does not exist. Must be one of {}' - .format(version, versions)) - - dataset_name = 'atl-cammoun2012' - keys = ['scale033', 'scale060', 'scale125', 'scale250', 'scale500'] - - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name)[version] - if url is None: - url = info['url'] - - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) - } - - # filenames differ based on selected version of dataset - if version == 'MNI152NLin2009aSym': - filenames = [ - 'atl-Cammoun2012_space-MNI152NLin2009aSym_res-{}_deterministic{}' - .format(res[-3:], suff) for res in keys for suff in ['.nii.gz'] - ] + ['atl-Cammoun2012_space-MNI152NLin2009aSym_info.csv'] - elif version == 'fslr32k': - filenames = [ - 'atl-Cammoun2012_space-fslr32k_res-{}_hemi-{}_deterministic{}' - .format(res[-3:], hemi, suff) for res in keys - for hemi in ['L', 'R'] for suff in ['.label.gii'] - ] - elif version in ('fsaverage', 'fsaverage5', 'fsaverage6'): - filenames = [ - 'atl-Cammoun2012_space-{}_res-{}_hemi-{}_deterministic{}' - .format(version, res[-3:], hemi, suff) for res in keys - for hemi in ['L', 'R'] for suff in ['.annot'] - ] - else: - filenames = [ - 'atl-Cammoun2012_res-{}_hemi-{}_probabilistic{}' - .format(res[5:], hemi, suff) - for res in keys[:-1] + ['scale500v1', 'scale500v2', 'scale500v3'] - for hemi in ['L', 'R'] for suff in ['.gcs', '.ctab'] - ] - - files = [ - (op.join(dataset_name, version, f), url, opts) for f in filenames - ] - data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose) - - if version == 'MNI152NLin2009aSym': - keys += ['info'] - elif version in ('fslr32k', 'fsaverage', 'fsaverage5', 'fsaverage6'): - data = [SURFACE(*data[i:i + 2]) for i in range(0, len(data), 2)] - else: - data = [data[::2][i:i + 2] for i in range(0, len(data) // 2, 2)] - # deal with the fact that last scale is split into three files :sigh: - data = data[:-3] + [list(itertools.chain.from_iterable(data[-3:]))] - - return Bunch(**dict(zip(keys, data))) - - -def fetch_conte69(data_dir=None, url=None, resume=True, verbose=1): - """ - Download files for Van Essen et al., 2012 Conte69 template. - - Parameters - ---------- - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 - - Returns - ------- - filenames : :class:`sklearn.utils.Bunch` - Dictionary-like object with keys ['midthickness', 'inflated', - 'vinflated'], where corresponding values are lists of filepaths to - downloaded template files. - - References - ---------- - http://brainvis.wustl.edu/wiki/index.php//Caret:Atlases/Conte69_Atlas - - Van Essen, D. C., Glasser, M. F., Dierker, D. L., Harwell, J., & Coalson, - T. (2011). Parcellations and hemispheric asymmetries of human cerebral - cortex analyzed on surface-based atlases. Cerebral cortex, 22(10), - 2241-2262. - - Notes - ----- - License: ??? - """ - dataset_name = 'tpl-conte69' - keys = ['midthickness', 'inflated', 'vinflated'] - - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name) - if url is None: - url = info['url'] - - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) - } - - filenames = [ - 'tpl-conte69/tpl-conte69_space-MNI305_variant-fsLR32k_{}.{}.surf.gii' - .format(res, hemi) for res in keys for hemi in ['L', 'R'] - ] + ['tpl-conte69/template_description.json'] - - data = _fetch_files(data_dir, files=[(f, url, opts) for f in filenames], - resume=resume, verbose=verbose) - - with open(data[-1], 'r') as src: - data[-1] = json.load(src) - - # bundle hemispheres together - data = [SURFACE(*data[:-1][i:i + 2]) for i in range(0, 6, 2)] + [data[-1]] - - return Bunch(**dict(zip(keys + ['info'], data))) - - -def fetch_yerkes19(data_dir=None, url=None, resume=None, verbose=1): - """ - Download files for Donahue et al., 2016 Yerkes19 template. - - Parameters - ---------- - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 - - Returns - ------- - filenames : :class:`sklearn.utils.Bunch` - Dictionary-like object with keys ['midthickness', 'inflated', - 'vinflated'], where corresponding values are lists of filepaths to - downloaded template files. - - References - ---------- - https://balsa.wustl.edu/reference/show/976nz - - Donahue, C. J., Sotiropoulos, S. N., Jbabdi, S., Hernandez-Fernandez, M., - Behrens, T. E., Dyrby, T. B., ... & Glasser, M. F. (2016). Using diffusion - tractography to predict cortical connection strength and distance: a - quantitative comparison with tracers in the monkey. Journal of - Neuroscience, 36(25), 6758-6770. - - Notes - ----- - License: ??? - """ - dataset_name = 'tpl-yerkes19' - keys = ['midthickness', 'inflated', 'vinflated'] - - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name) - if url is None: - url = info['url'] - - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) - } - - filenames = [ - 'tpl-yerkes19/tpl-yerkes19_space-fsLR32k_{}.{}.surf.gii' - .format(res, hemi) for res in keys for hemi in ['L', 'R'] - ] - - data = _fetch_files(data_dir, files=[(f, url, opts) for f in filenames], - resume=resume, verbose=verbose) - - # bundle hemispheres together - data = [SURFACE(*data[i:i + 2]) for i in range(0, 6, 2)] - - return Bunch(**dict(zip(keys + ['info'], data))) - - -def fetch_pauli2018(data_dir=None, url=None, resume=True, verbose=1): - """ - Download files for Pauli et al., 2018 subcortical parcellation. - - Parameters - ---------- - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 - - Returns - ------- - filenames : :class:`sklearn.utils.Bunch` - Dictionary-like object with keys ['probabilistic', 'deterministic'], - where corresponding values are filepaths to downloaded atlas files. - - References - ---------- - Pauli, W. M., Nili, A. N., & Tyszka, J. M. (2018). A high-resolution - probabilistic in vivo atlas of human subcortical brain nuclei. Scientific - Data, 5, 180063. - - Notes - ----- - License: CC-BY Attribution 4.0 International - """ - dataset_name = 'atl-pauli2018' - keys = ['probabilistic', 'deterministic', 'info'] - - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name) - - # format the query how _fetch_files() wants things and then download data - files = [ - (i['name'], i['url'], dict(md5sum=i['md5'], move=i['name'])) - for i in info - ] - - data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose) - - return Bunch(**dict(zip(keys, data))) - - -def fetch_fsaverage(version='fsaverage', data_dir=None, url=None, resume=True, - verbose=1): - """ - Download files for fsaverage FreeSurfer template. - - Parameters - ---------- - version : str, optional - One of {'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', - 'fsaverage6'}. Default: 'fsaverage' - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 - - Returns - ------- - filenames : :class:`sklearn.utils.Bunch` - Dictionary-like object with keys ['surf'] where corresponding values - are length-2 lists downloaded template files (each list composed of - files for the left and right hemisphere). - """ - versions = [ - 'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6' - ] - if version not in versions: - raise ValueError('The version of fsaverage requested "{}" does not ' - 'exist. Must be one of {}'.format(version, versions)) - - dataset_name = 'tpl-fsaverage' - keys = ['orig', 'white', 'smoothwm', 'pial', 'inflated', 'sphere'] - - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name)[version] - if url is None: - url = info['url'] - - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) - } - - filenames = [ - op.join(version, 'surf', '{}.{}'.format(hemi, surf)) - for surf in keys for hemi in ['lh', 'rh'] - ] - - try: - data_dir = check_fs_subjid(version)[1] - data = [op.join(data_dir, f) for f in filenames] - except FileNotFoundError: - data = _fetch_files(data_dir, resume=resume, verbose=verbose, - files=[(op.join(dataset_name, f), url, opts) - for f in filenames]) - - data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)] - - return Bunch(**dict(zip(keys, data))) - - -def available_connectomes(): - """ - List datasets available via :func:`~.fetch_connectome`. - - Returns - ------- - datasets : list of str - List of available datasets - """ - return sorted(_get_dataset_info('ds-connectomes').keys()) - - -def fetch_connectome(dataset, data_dir=None, url=None, resume=True, - verbose=1): - """ - Download files from multi-species connectomes. - - Parameters - ---------- - dataset : str - Specifies which dataset to download; must be one of the datasets listed - in :func:`netneurotools.datasets.available_connectomes()`. - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 - - Returns - ------- - data : :class:`sklearn.utils.Bunch` - Dictionary-like object with, at a minimum, keys ['conn', 'labels', - 'ref'] providing connectivity / correlation matrix, region labels, and - relevant reference. Other possible keys include 'dist' (an array of - Euclidean distances between regions of 'conn'), 'coords' (an array of - xyz coordinates for regions of 'conn'), 'acronyms' (an array of - acronyms for regions of 'conn'), and 'networks' (an array of network - affiliations for regions of 'conn') - - References - ---------- - See `ref` key of returned dictionary object for relevant dataset reference - """ - if dataset not in available_connectomes(): - raise ValueError('Provided dataset {} not available; must be one of {}' - .format(dataset, available_connectomes())) - - dataset_name = 'ds-connectomes' - - data_dir = op.join(_get_data_dir(data_dir=data_dir), dataset_name) - info = _get_dataset_info(dataset_name)[dataset] - if url is None: - url = info['url'] - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset) - } - - filenames = [ - op.join(dataset, '{}.csv'.format(fn)) for fn in info['keys'] - ] + [op.join(dataset, 'ref.txt')] - data = _fetch_files(data_dir, files=[(f, url, opts) for f in filenames], - resume=resume, verbose=verbose) - - # load data - for n, arr in enumerate(data[:-1]): - try: - data[n] = np.loadtxt(arr, delimiter=',') - except ValueError: - data[n] = np.loadtxt(arr, delimiter=',', dtype=str) - with open(data[-1]) as src: - data[-1] = src.read().strip() - - return Bunch(**dict(zip(info['keys'] + ['ref'], data))) - - -def fetch_vazquez_rodriguez2019(data_dir=None, url=None, resume=True, - verbose=1): - """ - Download files from Vazquez-Rodriguez et al., 2019, PNAS. - - Parameters - ---------- - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 - - Returns - ------- - data : :class:`sklearn.utils.Bunch` - Dictionary-like object with keys ['rsquared', 'gradient'] containing - 1000 values from - - References - ---------- - See `ref` key of returned dictionary object for relevant dataset reference - """ - dataset_name = 'ds-vazquez_rodriguez2019' - - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name) - if url is None: - url = info['url'] - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) - } - - filenames = [ - op.join(dataset_name, 'rsquared_gradient.csv') - ] - data = _fetch_files(data_dir, files=[(f, url, opts) for f in filenames], - resume=resume, verbose=verbose) - - # load data - rsq, grad = np.loadtxt(data[0], delimiter=',', skiprows=1).T - - return Bunch(rsquared=rsq, gradient=grad) - - -def fetch_schaefer2018(version='fsaverage', data_dir=None, url=None, - resume=True, verbose=1): - """ - Download FreeSurfer .annot files for Schaefer et al., 2018 parcellation. - - Parameters - ---------- - version : {'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k'} - Specifies which surface annotation files should be matched to. Default: - 'fsaverage' - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 - - Returns - ------- - filenames : :class:`sklearn.utils.Bunch` - Dictionary-like object with keys of format '{}Parcels{}Networks' where - corresponding values are the left/right hemisphere annotation files - - References - ---------- - Schaefer, A., Kong, R., Gordon, E. M., Laumann, T. O., Zuo, X. N., Holmes, - A. J., ... & Yeo, B. T. (2017). Local-global parcellation of the human - cerebral cortex from intrinsic functional connectivity MRI. Cerebral - Cortex, 28(9), 3095-3114. - - Notes - ----- - License: https://github.com/ThomasYeoLab/CBIG/blob/master/LICENSE.md - """ - versions = ['fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k'] - if version not in versions: - raise ValueError('The version of Schaefer et al., 2018 parcellation ' - 'requested "{}" does not exist. Must be one of {}' - .format(version, versions)) - - dataset_name = 'atl-schaefer2018' - keys = [ - '{}Parcels{}Networks'.format(p, n) - for p in range(100, 1001, 100) for n in [7, 17] - ] - - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name)[version] - if url is None: - url = info['url'] - - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) - } - - if version == 'fslr32k': - hemispheres, suffix = ['LR'], 'dlabel.nii' - else: - hemispheres, suffix = ['L', 'R'], 'annot' - filenames = [ - 'atl-Schaefer2018_space-{}_hemi-{}_desc-{}_deterministic.{}' - .format(version, hemi, desc, suffix) - for desc in keys for hemi in hemispheres - ] - - files = [(op.join(dataset_name, version, f), url, opts) - for f in filenames] - data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose) - - if suffix == 'annot': - data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)] - - return Bunch(**dict(zip(keys, data))) - - -def fetch_hcp_standards(data_dir=None, url=None, resume=True, verbose=1): - """ - Fetch HCP standard mesh atlases for converting between FreeSurfer and HCP. - - Parameters - ---------- - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 - - Returns - ------- - standards : str - Filepath to standard_mesh_atlases directory - """ - if url is None: - url = 'https://web.archive.org/web/20220121035833/' + \ - 'http://brainvis.wustl.edu/workbench/standard_mesh_atlases.zip' - dataset_name = 'standard_mesh_atlases' - data_dir = _get_data_dir(data_dir=data_dir) - opts = { - 'uncompress': True, - 'move': '{}.zip'.format(dataset_name) - } - filenames = [ - 'L.sphere.32k_fs_LR.surf.gii', 'R.sphere.32k_fs_LR.surf.gii' - ] - files = [(op.join(dataset_name, f), url, opts) for f in filenames] - _fetch_files(data_dir, files=files, resume=resume, verbose=verbose) - - return op.join(data_dir, dataset_name) - - -def fetch_mmpall(version='fslr32k', data_dir=None, url=None, resume=True, - verbose=1): - """ - Download .label.gii files for Glasser et al., 2016 MMPAll atlas. - - Parameters - ---------- - version : {'fslr32k'} - Specifies which surface annotation files should be matched to. Default: - 'fslr32k' - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 - - Returns - ------- - filenames : :class:`sklearn.utils.Bunch` - Namedtuple with fields ('lh', 'rh') corresponding to filepaths to - left/right hemisphere parcellation files - - References - ---------- - Glasser, M. F., Coalson, T. S., Robinson, E. C., Hacker, C. D., Harwell, - J., Yacoub, E., ... & Van Essen, D. C. (2016). A multi-modal parcellation - of human cerebral cortex. Nature, 536(7615), 171-178. - - Notes - ----- - License: https://www.humanconnectome.org/study/hcp-young-adult/document/ - wu-minn-hcp-consortium-open-access-data-use-terms - """ - versions = ['fslr32k'] - if version not in versions: - raise ValueError('The version of Glasser et al., 2016 parcellation ' - 'requested "{}" does not exist. Must be one of {}' - .format(version, versions)) - - dataset_name = 'atl-mmpall' - - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name)[version] - if url is None: - url = info['url'] - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) - } - - hemispheres = ['L', 'R'] - filenames = [ - 'atl-MMPAll_space-{}_hemi-{}_deterministic.label.gii' - .format(version, hemi) for hemi in hemispheres - ] - - files = [(op.join(dataset_name, version, f), url, opts) for f in filenames] - data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose) - - return SURFACE(*data) - - -def fetch_voneconomo(data_dir=None, url=None, resume=True, verbose=1): - """ - Fetch von-Economo Koskinas probabilistic FreeSurfer atlas. - - Parameters - ---------- - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 - - Returns - ------- - filenames : :class:`sklearn.utils.Bunch` - Dictionary-like object with keys ['gcs', 'ctab', 'info'] - - References - ---------- - Scholtens, L. H., de Reus, M. A., de Lange, S. C., Schmidt, R., & van den - Heuvel, M. P. (2018). An MRI von Economo–Koskinas atlas. NeuroImage, 170, - 249-256. - - Notes - ----- - License: CC-BY-NC-SA 4.0 - """ - dataset_name = 'atl-voneconomo_koskinas' - keys = ['gcs', 'ctab', 'info'] - - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name) - if url is None: - url = info['url'] - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) - } - filenames = [ - 'atl-vonEconomoKoskinas_hemi-{}_probabilistic.{}'.format(hemi, suff) - for hemi in ['L', 'R'] for suff in ['gcs', 'ctab'] - ] + ['atl-vonEconomoKoskinas_info.csv'] - files = [(op.join(dataset_name, f), url, opts) for f in filenames] - data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose) - data = [SURFACE(*data[:-1:2])] + [SURFACE(*data[1:-1:2])] + [data[-1]] - - return Bunch(**dict(zip(keys, data))) - - -def fetch_civet(density='41k', version='v1', data_dir=None, url=None, - resume=True, verbose=1): - """ - Fetch CIVET surface files. - - Parameters - ---------- - density : {'41k', '164k'}, optional - Which density of the CIVET-space geometry files to fetch. The - high-resolution '164k' surface only exists for version 'v2' - version : {'v1, 'v2'}, optional - Which version of the CIVET surfaces to use. Default: 'v2' - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 - - Returns - ------- - filenames : :class:`sklearn.utils.Bunch` - Dictionary-like object with keys ['mid', 'white'] containing geometry - files for CIVET surface. Note for version 'v1' the 'mid' and 'white' - files are identical. - - References - ---------- - Y. Ad-Dab’bagh, O. Lyttelton, J.-S. Muehlboeck, C. Lepage, D. Einarson, K. - Mok, O. Ivanov, R. Vincent, J. Lerch, E. Fombonne, A. C. Evans, The CIVET - image-processing environment: A fully automated comprehensive pipeline for - anatomical neuroimaging research. Proceedings of the 12th Annual Meeting of - the Organization for Human Brain Mapping (2006). - - Notes - ----- - License: https://github.com/aces/CIVET_Full_Project/blob/master/LICENSE - """ - densities = ['41k', '164k'] - if density not in densities: - raise ValueError('The density of CIVET requested "{}" does not exist. ' - 'Must be one of {}'.format(density, densities)) - versions = ['v1', 'v2'] - if version not in versions: - raise ValueError('The version of CIVET requested "{}" does not exist. ' - 'Must be one of {}'.format(version, versions)) - - if version == 'v1' and density == '164k': - raise ValueError('The "164k" density CIVET surface only exists for ' - 'version "v2"') - - dataset_name = 'tpl-civet' - keys = ['mid', 'white'] - - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name)[version]['civet{}'.format(density)] - if url is None: - url = info['url'] - - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) - } - filenames = [ - op.join(dataset_name, version, 'civet{}'.format(density), - 'tpl-civet_space-ICBM152_hemi-{}_den-{}_{}.obj' - .format(hemi, density, surf)) - for surf in keys for hemi in ['L', 'R'] - ] - - data = _fetch_files(data_dir, resume=resume, verbose=verbose, - files=[(f, url, opts) for f in filenames]) - - data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)] - - return Bunch(**dict(zip(keys, data))) diff --git a/netneurotools/tests/__init__.py b/netneurotools/datasets/tests/__init__.py similarity index 100% rename from netneurotools/tests/__init__.py rename to netneurotools/datasets/tests/__init__.py diff --git a/netneurotools/datasets/tests/test_datasetsutils.py b/netneurotools/datasets/tests/test_datasetsutils.py new file mode 100644 index 0000000..15e69cd --- /dev/null +++ b/netneurotools/datasets/tests/test_datasetsutils.py @@ -0,0 +1,35 @@ +"""For testing netneurotools.datasets.datasets_utils functionality.""" +import os + +import pytest + +from netneurotools.datasets import datasets_utils as utils + + +@pytest.mark.parametrize('dset, expected', [ + ('atl-cammoun2012', ['fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k', + 'MNI152NLin2009aSym', 'gcs']), + ('tpl-conte69', ['url', 'md5']), + ('atl-pauli2018', ['url', 'md5', 'name']), + ('tpl-fsaverage', ['fsaverage' + f for f in ['', '3', '4', '5', '6']]), + ('atl-schaefer2018', ['fsaverage', 'fsaverage6', 'fsaverage6']) +]) +def test_get_dataset_info(dset, expected): + """Test getting dataset info.""" + info = utils._get_dataset_info(dset) + if isinstance(info, dict): + assert all(k in info.keys() for k in expected) + elif isinstance(info, list): + for f in info: + assert all(k in f.keys() for k in expected) + else: + assert False + + with pytest.raises(KeyError): + utils._get_dataset_info('notvalid') + + +def test_get_data_dir(tmpdir): + """Test getting data directory.""" + data_dir = utils._get_data_dir(tmpdir) + assert os.path.isdir(data_dir) diff --git a/netneurotools/tests/test_datasets.py b/netneurotools/datasets/tests/test_fetch.py similarity index 66% rename from netneurotools/tests/test_datasets.py rename to netneurotools/datasets/tests/test_fetch.py index 0a5af12..adbf983 100644 --- a/netneurotools/tests/test_datasets.py +++ b/netneurotools/datasets/tests/test_fetch.py @@ -1,79 +1,56 @@ -# -*- coding: utf-8 -*- -"""For testing netneurotools.datasets functionality.""" - +"""For testing netneurotools.datasets.fetch_* functionality.""" import os - -import numpy as np import pytest - +import numpy as np from netneurotools import datasets -from netneurotools.datasets import utils - -@pytest.mark.parametrize('corr, size, tol, seed', [ - (0.85, (1000,), 0.05, 1234), - (0.85, (1000, 1000), 0.05, 1234), - ([[1, 0.5, 0.3], [0.5, 1, 0], [0.3, 0, 1]], (1000,), 0.05, 1234) +@pytest.mark.parametrize('version', [ + 'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6' ]) -def test_make_correlated_xy(corr, size, tol, seed): - out = datasets.make_correlated_xy(corr=corr, size=size, - tol=tol, seed=seed) - # ensure output is expected shape - assert out.shape[1:] == size - assert len(out) == len(corr) if hasattr(corr, '__len__') else 2 - - # check outputs are correlated within specified tolerance - realcorr = np.corrcoef(out.reshape(len(out), -1)) - if len(realcorr) == 2 and not hasattr(corr, '__len__'): - realcorr = realcorr[0, 1] - assert np.all(np.abs(realcorr - corr) < tol) - - # check that seed generates reproducible values - duplicate = datasets.make_correlated_xy(corr=corr, size=size, - tol=tol, seed=seed) - assert np.allclose(out, duplicate) - - -@pytest.mark.parametrize('corr', [ - (1.5), (-1.5), # outside range of [-1, 1] - ([0.85]), ([[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]]), # not 2D / square array - ([[0.85]]), ([[1, 0.5], [0.5, 0.5]]) # diagonal not equal to 1 +def test_fetch_fsaverage(tmpdir, version): + """Test fetching of fsaverage surfaces.""" + fsaverage = datasets.fetch_fsaverage(version=version, data_dir=tmpdir, + verbose=0) + assert all(hasattr(fsaverage, k) + and len(fsaverage[k]) == 2 + and all(os.path.isfile(hemi) + for hemi in fsaverage[k]) for k in + ['orig', 'white', 'smoothwm', 'pial', 'inflated', 'sphere']) + + +def test_fetch_hcp_standards(tmpdir): + """Test fetching of HCP standard meshes.""" + hcp = datasets.fetch_hcp_standards(data_dir=tmpdir, verbose=0) + assert os.path.isdir(hcp) + + +@pytest.mark.parametrize('version', [ + 'v1', 'v2' ]) -def test_make_correlated_xy_errors(corr): - with pytest.raises(ValueError): - datasets.make_correlated_xy(corr) +def test_fetch_civet(tmpdir, version): + """Test fetching of CIVET templates.""" + civet = datasets.fetch_civet(version=version, data_dir=tmpdir, verbose=0) + for key in ('mid', 'white'): + assert key in civet + for hemi in ('lh', 'rh'): + assert hasattr(civet[key], hemi) + assert os.path.isfile(getattr(civet[key], hemi)) def test_fetch_conte69(tmpdir): + """Test fetching of Conte69 surfaces.""" conte = datasets.fetch_conte69(data_dir=tmpdir, verbose=0) assert all(hasattr(conte, k) for k in ['midthickness', 'inflated', 'vinflated', 'info']) def test_fetch_yerkes19(tmpdir): + """Test fetching of Yerkes19 surfaces.""" conte = datasets.fetch_yerkes19(data_dir=tmpdir, verbose=0) assert all(hasattr(conte, k) for k in ['midthickness', 'inflated', 'vinflated']) -def test_fetch_pauli2018(tmpdir): - pauli = datasets.fetch_pauli2018(data_dir=tmpdir, verbose=0) - assert all(hasattr(pauli, k) and os.path.isfile(pauli[k]) for k in - ['probabilistic', 'deterministic', 'info']) - - -@pytest.mark.parametrize('version', [ - 'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6' -]) -def test_fetch_fsaverage(tmpdir, version): - fsaverage = datasets.fetch_fsaverage(version=version, data_dir=tmpdir, - verbose=0) - assert all(hasattr(fsaverage, k) - and len(fsaverage[k]) == 2 - and all(os.path.isfile(hemi) - for hemi in fsaverage[k]) for k in - ['orig', 'white', 'smoothwm', 'pial', 'inflated', 'sphere']) - @pytest.mark.parametrize('version, expected', [ ('MNI152NLin2009aSym', [1, 1, 1, 1, 1]), @@ -84,6 +61,7 @@ def test_fetch_fsaverage(tmpdir, version): ('gcs', [2, 2, 2, 2, 6]) ]) def test_fetch_cammoun2012(tmpdir, version, expected): + """Test fetching of Cammoun2012 parcellations.""" keys = ['scale033', 'scale060', 'scale125', 'scale250', 'scale500'] cammoun = datasets.fetch_cammoun2012(version, data_dir=tmpdir, verbose=0) @@ -102,36 +80,11 @@ def test_fetch_cammoun2012(tmpdir, version, expected): datasets.fetch_cammoun2012('surface', data_dir=tmpdir, verbose=0) -@pytest.mark.parametrize('dataset, expected', [ - ('celegans', ['conn', 'dist', 'labels', 'ref']), - ('drosophila', ['conn', 'coords', 'labels', 'networks', 'ref']), - ('human_func_scale033', ['conn', 'coords', 'labels', 'ref']), - ('human_func_scale060', ['conn', 'coords', 'labels', 'ref']), - ('human_func_scale125', ['conn', 'coords', 'labels', 'ref']), - ('human_func_scale250', ['conn', 'coords', 'labels', 'ref']), - ('human_func_scale500', ['conn', 'coords', 'labels', 'ref']), - ('human_struct_scale033', ['conn', 'coords', 'dist', 'labels', 'ref']), - ('human_struct_scale060', ['conn', 'coords', 'dist', 'labels', 'ref']), - ('human_struct_scale125', ['conn', 'coords', 'dist', 'labels', 'ref']), - ('human_struct_scale250', ['conn', 'coords', 'dist', 'labels', 'ref']), - ('human_struct_scale500', ['conn', 'coords', 'dist', 'labels', 'ref']), - ('macaque_markov', ['conn', 'dist', 'labels', 'ref']), - ('macaque_modha', ['conn', 'coords', 'dist', 'labels', 'ref']), - ('mouse', ['acronyms', 'conn', 'coords', 'dist', 'labels', 'ref']), - ('rat', ['conn', 'labels', 'ref']), -]) -def test_fetch_connectome(tmpdir, dataset, expected): - connectome = datasets.fetch_connectome(dataset, data_dir=tmpdir, verbose=0) - - for key in expected: - assert (key in connectome) - assert isinstance(connectome[key], str if key == 'ref' else np.ndarray) - - @pytest.mark.parametrize('version', [ 'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k' ]) def test_fetch_schaefer2018(tmpdir, version): + """Test fetching of Schaefer2018 parcellations.""" keys = [ '{}Parcels{}Networks'.format(p, n) for p in range(100, 1001, 100) for n in [7, 17] @@ -147,58 +100,61 @@ def test_fetch_schaefer2018(tmpdir, version): for k in keys) -def test_fetch_hcp_standards(tmpdir): - hcp = datasets.fetch_hcp_standards(data_dir=tmpdir, verbose=0) - assert os.path.isdir(hcp) - - def test_fetch_mmpall(tmpdir): + """Test fetching of MMPAll parcellations.""" mmp = datasets.fetch_mmpall(data_dir=tmpdir, verbose=0) assert len(mmp) == 2 assert all(os.path.isfile(hemi) for hemi in mmp) assert all(hasattr(mmp, attr) for attr in ('lh', 'rh')) +def test_fetch_pauli2018(tmpdir): + """Test fetching of Pauli2018 parcellations.""" + pauli = datasets.fetch_pauli2018(data_dir=tmpdir, verbose=0) + assert all(hasattr(pauli, k) and os.path.isfile(pauli[k]) for k in + ['probabilistic', 'deterministic', 'info']) + + +@pytest.mark.xfail +def test_fetch_ye2020(tmpdir): + """Test fetching of Ye2020 parcellations.""" + pass + + + def test_fetch_voneconomo(tmpdir): + """Test fetching of von Economo parcellations.""" vek = datasets.fetch_voneconomo(data_dir=tmpdir, verbose=0) assert all(hasattr(vek, k) and len(vek[k]) == 2 for k in ['gcs', 'ctab']) assert isinstance(vek.get('info'), str) -@pytest.mark.parametrize('dset, expected', [ - ('atl-cammoun2012', ['fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k', - 'MNI152NLin2009aSym', 'gcs']), - ('tpl-conte69', ['url', 'md5']), - ('atl-pauli2018', ['url', 'md5', 'name']), - ('tpl-fsaverage', ['fsaverage' + f for f in ['', '3', '4', '5', '6']]), - ('atl-schaefer2018', ['fsaverage', 'fsaverage6', 'fsaverage6']) +@pytest.mark.parametrize('dataset, expected', [ + ('celegans', ['conn', 'dist', 'labels', 'ref']), + ('drosophila', ['conn', 'coords', 'labels', 'networks', 'ref']), + ('human_func_scale033', ['conn', 'coords', 'labels', 'ref']), + ('human_func_scale060', ['conn', 'coords', 'labels', 'ref']), + ('human_func_scale125', ['conn', 'coords', 'labels', 'ref']), + ('human_func_scale250', ['conn', 'coords', 'labels', 'ref']), + ('human_func_scale500', ['conn', 'coords', 'labels', 'ref']), + ('human_struct_scale033', ['conn', 'coords', 'dist', 'labels', 'ref']), + ('human_struct_scale060', ['conn', 'coords', 'dist', 'labels', 'ref']), + ('human_struct_scale125', ['conn', 'coords', 'dist', 'labels', 'ref']), + ('human_struct_scale250', ['conn', 'coords', 'dist', 'labels', 'ref']), + ('human_struct_scale500', ['conn', 'coords', 'dist', 'labels', 'ref']), + ('macaque_markov', ['conn', 'dist', 'labels', 'ref']), + ('macaque_modha', ['conn', 'coords', 'dist', 'labels', 'ref']), + ('mouse', ['acronyms', 'conn', 'coords', 'dist', 'labels', 'ref']), + ('rat', ['conn', 'labels', 'ref']), ]) -def test_get_dataset_info(dset, expected): - info = utils._get_dataset_info(dset) - if isinstance(info, dict): - assert all(k in info.keys() for k in expected) - elif isinstance(info, list): - for f in info: - assert all(k in f.keys() for k in expected) - else: - assert False +def test_fetch_famous_gmat(tmpdir, dataset, expected): + """Test fetching of famous G.mat datasets.""" + connectome = datasets.fetch_famous_gmat(dataset, data_dir=tmpdir, verbose=0) - with pytest.raises(KeyError): - utils._get_dataset_info('notvalid') + for key in expected: + assert (key in connectome) + assert isinstance(connectome[key], str if key == 'ref' else np.ndarray) -@pytest.mark.parametrize('version', [ - 'v1', 'v2' -]) -def test_fetch_civet(tmpdir, version): - civet = datasets.fetch_civet(version=version, data_dir=tmpdir, verbose=0) - for key in ('mid', 'white'): - assert key in civet - for hemi in ('lh', 'rh'): - assert hasattr(civet[key], hemi) - assert os.path.isfile(getattr(civet[key], hemi)) -def test_get_data_dir(tmpdir): - data_dir = utils._get_data_dir(tmpdir) - assert os.path.isdir(data_dir) diff --git a/netneurotools/datasets/utils.py b/netneurotools/datasets/utils.py deleted file mode 100644 index 4339c57..0000000 --- a/netneurotools/datasets/utils.py +++ /dev/null @@ -1,100 +0,0 @@ -# -*- coding: utf-8 -*- -"""Utilites for loading / creating datasets.""" - -import json -import os -import importlib.resources - -if getattr(importlib.resources, 'files', None) is not None: - _importlib_avail = True -else: - from pkg_resources import resource_filename - _importlib_avail = False - - -def _osfify_urls(data): - """ - Format `data` object with OSF API URL. - - Parameters - ---------- - data : object - If dict with a `url` key, will format OSF_API with relevant values - - Returns - ------- - data : object - Input data with all `url` dict keys formatted - """ - OSF_API = "https://files.osf.io/v1/resources/{}/providers/osfstorage/{}" - - if isinstance(data, str): - return data - elif 'url' in data: - data['url'] = OSF_API.format(*data['url']) - - try: - for key, value in data.items(): - data[key] = _osfify_urls(value) - except AttributeError: - for n, value in enumerate(data): - data[n] = _osfify_urls(value) - - return data - - -if _importlib_avail: - osf = importlib.resources.files("netneurotools") / "data/osf.json" -else: - osf = resource_filename('netneurotools', 'data/osf.json') - -with open(osf) as src: - OSF_RESOURCES = _osfify_urls(json.load(src)) - - -def _get_dataset_info(name): - """ - Return url and MD5 checksum for dataset `name`. - - Parameters - ---------- - name : str - Name of dataset - - Returns - ------- - url : str - URL from which to download dataset - md5 : str - MD5 checksum for file downloade from `url` - """ - try: - return OSF_RESOURCES[name] - except KeyError: - raise KeyError("Provided dataset '{}' is not valid. Must be one of: {}" - .format(name, sorted(OSF_RESOURCES.keys()))) from None - - -def _get_data_dir(data_dir=None): - """ - Get path to netneurotools data directory. - - Parameters - ---------- - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - - Returns - ------- - data_dir : str - Path to use as data directory - """ - if data_dir is None: - data_dir = os.environ.get('NNT_DATA', os.path.join('~', 'nnt-data')) - data_dir = os.path.expanduser(data_dir) - if not os.path.exists(data_dir): - os.makedirs(data_dir) - - return data_dir diff --git a/netneurotools/experimental/__init__.py b/netneurotools/experimental/__init__.py new file mode 100644 index 0000000..911c0f7 --- /dev/null +++ b/netneurotools/experimental/__init__.py @@ -0,0 +1,4 @@ +"""Functions in alpha stage.""" + + +__all__ = [] diff --git a/netneurotools/freesurfer.py b/netneurotools/freesurfer.py deleted file mode 100644 index 047590d..0000000 --- a/netneurotools/freesurfer.py +++ /dev/null @@ -1,662 +0,0 @@ -# -*- coding: utf-8 -*- -"""Functions for working with FreeSurfer data and parcellations.""" - -import os -import os.path as op -import warnings - -from nibabel.freesurfer import read_annot, read_geometry -import numpy as np -from scipy import sparse -try: # scipy >= 1.8.0 - from scipy.ndimage._measurements import _stats, labeled_comprehension -except ImportError: # scipy < 1.8.0 - from scipy.ndimage.measurements import _stats, labeled_comprehension -from scipy.spatial.distance import cdist - -from .datasets import fetch_fsaverage -from .stats import gen_spinsamples -from .surface import make_surf_graph -from .utils import check_fs_subjid, run - -FSIGNORE = [ - 'unknown', 'corpuscallosum', 'Background+FreeSurfer_Defined_Medial_Wall' -] - - -def apply_prob_atlas(subject_id, gcs, hemi, *, orig='white', annot=None, - ctab=None, subjects_dir=None, use_cache=True, - quiet=False): - """ - Create an annotation file for `subject_id` by applying atlas in `gcs`. - - Runs subprocess calling FreeSurfer's "mris_ca_label" function; as such, - FreeSurfer must be installed and accesible on the local system path. - - Parameters - ---------- - subject_id : str - FreeSurfer subject ID - gcs : str - Filepath to .gcs file containing classifier array - hemi : {'lh', 'rh'} - Hemisphere corresponding to `gcs` file - orig : str, optional - Original surface to which to apply classifer. Default: 'white' - annot : str, optional - Path to output annotation file to generate. If set to None, the name is - created from the provided `hemi` and `gcs`. If provided as a - relative path, it is assumed to stem from `subjects_dir`/`subject_id`. - Default: None - ctab : str, optional - Path to colortable corresponding to `gcs`. Default: None - subjects_dir : str, optional - Path to FreeSurfer subject directory. If not set, will inherit from - the environmental variable $SUBJECTS_DIR. Default: None - use_cache : bool, optional - Whether to check for existence of `annot` in directory specified by - `{subjects_dir}/{subject_id}/label' and use that, if it exists. If - False, will create a new annot file. Default: True - quiet : bool, optional - Whether to restrict status messages. Default: False - - Returns - ------- - annot : str - Path to generated annotation file - """ - cmd = 'mris_ca_label {opts}{subject_id} {hemi} {hemi}.sphere.reg ' \ - '{gcs} {annot}' - - if hemi not in ['rh', 'lh']: - raise ValueError('Provided hemisphere designation `hemi` must be one ' - 'of \'rh\' or \'lh\'. Provided: {}'.format(hemi)) - if not op.isfile(gcs): - raise ValueError('Cannot find specified `gcs` file {}.'.format(gcs)) - - subject_id, subjects_dir = check_fs_subjid(subject_id, subjects_dir) - - # add all the options together, as specified - opts = '' - if ctab is not None and op.isfile(ctab): - opts += '-t {} '.format(ctab) - if orig is not None: - opts += '-orig {} '.format(orig) - if subjects_dir is not None: - opts += '-sdir {} '.format(subjects_dir) - else: - subjects_dir = os.environ['SUBJECTS_DIR'] - - # generate output filename - if annot is None: - base = '{}.{}.annot'.format(hemi, gcs[:-4]) - annot = op.join(subjects_dir, subject_id, 'label', base) - else: - # if not a full path, assume relative from subjects_dir/subject_id - if not annot.startswith(op.abspath(os.sep)): - annot = op.join(subjects_dir, subject_id, annot) - - # if annotation file doesn't exist or we explicitly want to make a new one - if not op.isfile(annot) or not use_cache: - run(cmd.format(opts=opts, subject_id=subject_id, hemi=hemi, - gcs=gcs, annot=annot), - quiet=quiet) - - return annot - - -def _decode_list(vals): - """List decoder.""" - return [val.decode() if hasattr(val, 'decode') else val for val in vals] - - -def find_parcel_centroids(*, lhannot, rhannot, method='surface', - version='fsaverage', surf='sphere', drop=None): - """ - Return vertex coords corresponding to centroids of parcels in annotations. - - Note that using any other `surf` besides the default of 'sphere' may result - in centroids that are not directly within the parcels themselves due to - sulcal folding patterns. - - Parameters - ---------- - {lh,rh}annot : str - Path to .annot file containing labels of parcels on the {left,right} - hemisphere. These must be specified as keyword arguments to avoid - accidental order switching. - method : {'average', 'surface', 'geodesic'}, optional - Method for calculation of parcel centroid. See Notes for more - information. Default: 'surface' - version : str, optional - Specifies which version of `fsaverage` provided annotation files - correspond to. Must be one of {'fsaverage', 'fsaverage3', 'fsaverage4', - 'fsaverage5', 'fsaverage6'}. Default: 'fsaverage' - surf : str, optional - Specifies which surface projection of fsaverage to use for finding - parcel centroids. Default: 'sphere' - drop : list, optional - Specifies regions in {lh,rh}annot for which the parcel centroid should - not be calculated. If not specified, centroids for parcels defined in - `netneurotools.freesurfer.FSIGNORE` are not calculated. Default: None - - Returns - ------- - centroids : (N, 3) numpy.ndarray - xyz coordinates of vertices closest to the centroid of each parcel - defined in `lhannot` and `rhannot` - hemiid : (N,) numpy.ndarray - Array denoting hemisphere designation of coordinates in `centroids`, - where `hemiid=0` denotes the left and `hemiid=1` the right hemisphere - - Notes - ----- - The following methods can be used for finding parcel centroids: - - 1. ``method='average'`` - - Uses the arithmetic mean of the coordinates for the vertices in each - parcel. Note that in this case the calculated centroids will not act - actually fall on the surface of `surf`. - - 2. ``method='surface'`` - - Calculates the 'average' coordinates and then finds the closest vertex - on `surf`, where closest is defined as the vertex with the minimum - Euclidean distance. - - 3. ``method='geodesic'`` - - Uses the coordinates of the vertex with the minimum average geodesic - distance to all other vertices in the parcel. Note that this is slightly - more time-consuming than the other two methods, especially for - high-resolution meshes. - """ - methods = ['average', 'surface', 'geodesic'] - if method not in methods: - raise ValueError('Provided method for centroid calculation {} is ' - 'invalid. Must be one of {}'.format(methods, methods)) - - if drop is None: - drop = FSIGNORE - drop = _decode_list(drop) - - surfaces = fetch_fsaverage(version)[surf] - - centroids, hemiid = [], [] - for n, (annot, surf) in enumerate(zip([lhannot, rhannot], surfaces)): - vertices, faces = read_geometry(surf) - labels, ctab, names = read_annot(annot) - names = _decode_list(names) - - for lab in np.unique(labels): - if names[lab] in drop: - continue - if method in ['average', 'surface']: - roi = np.atleast_2d(vertices[labels == lab].mean(axis=0)) - if method == 'surface': # find closest vertex on the sphere - roi = vertices[np.argmin(cdist(vertices, roi), axis=0)[0]] - elif method == 'geodesic': - inds, = np.where(labels == lab) - roi = _geodesic_parcel_centroid(vertices, faces, inds) - centroids.append(roi) - hemiid.append(n) - - return np.vstack(centroids), np.asarray(hemiid) - - -def _geodesic_parcel_centroid(vertices, faces, inds): - """ - Calculate parcel centroids based on surface distance. - - Parameters - ---------- - vertices : (N, 3) - Coordinates of vertices defining surface - faces : (F, 3) - Triangular faces defining surface - inds : (R,) - Indices of `vertices` that belong to parcel - - Returns - ------- - roi : (3,) numpy.ndarray - Vertex corresponding to centroid of parcel - """ - mask = np.ones(len(vertices), dtype=bool) - mask[inds] = False - mat = make_surf_graph(vertices, faces, mask=mask) - paths = sparse.csgraph.dijkstra(mat, directed=False, indices=inds)[:, inds] - - # the selected vertex is the one with the minimum average shortest path - # to the other vertices in the parcel - roi = vertices[inds[paths.mean(axis=1).argmin()]] - - return roi - - -def parcels_to_vertices(data, *, lhannot, rhannot, drop=None): - """ - Project parcellated `data` to vertices defined in annotation files. - - Assigns np.nan to all ROIs in `drop` - - Parameters - ---------- - data : (N,) numpy.ndarray - Parcellated data to be projected to vertices. Parcels should be ordered - by [left, right] hemisphere; ordering within hemisphere should - correspond to the provided annotation files. - {lh,rh}annot : str - Path to .annot file containing labels of parcels on the {left,right} - hemisphere. These must be specified as keyword arguments to avoid - accidental order switching. - drop : list, optional - Specifies regions in {lh,rh}annot that are not present in `data`. NaNs - will be inserted in place of the these regions in the returned data. If - not specified, parcels defined in `netneurotools.freesurfer.FSIGNORE` - are assumed to not be present. Default: None - - Returns - ------- - projected : numpy.ndarray - Vertex-level data - """ - if drop is None: - drop = FSIGNORE - drop = _decode_list(drop) - - data = np.vstack(data).astype(float) - - # check this so we're not unduly surprised by anything... - n_vert = expected = 0 - for a in [lhannot, rhannot]: - vn, _, names = read_annot(a) - n_vert += len(vn) - names = _decode_list(names) - expected += len(names) - len(set(drop) & set(names)) - if expected != len(data): - raise ValueError('Number of parcels in provided annotation files ' - 'differs from size of parcellated data array.\n' - ' EXPECTED: {} parcels\n' - ' RECEIVED: {} parcels' - .format(expected, len(data))) - - projected = np.zeros((n_vert, data.shape[-1]), dtype=data.dtype) - start = end = n_vert = 0 - for annot in [lhannot, rhannot]: - # read files and update end index for `data` - labels, ctab, names = read_annot(annot) - names = _decode_list(names) - todrop = set(names) & set(drop) - end += len(names) - len(todrop) # unknown and corpuscallosum - - # get indices of unknown and corpuscallosum and insert NaN values - inds = sorted([names.index(f) for f in todrop]) - inds = [f - n for n, f in enumerate(inds)] - currdata = np.insert(data[start:end], inds, np.nan, axis=0) - - # project to vertices and store - projected[n_vert:n_vert + len(labels), :] = currdata[labels] - start = end - n_vert += len(labels) - - return np.squeeze(projected) - - -def vertices_to_parcels(data, *, lhannot, rhannot, drop=None): - """ - Reduce vertex-level `data` to parcels defined in annotation files. - - Takes average of vertices within each parcel, excluding np.nan values - (i.e., np.nanmean). Assigns np.nan to parcels for which all vertices are - np.nan. - - Parameters - ---------- - data : (N,) numpy.ndarray - Vertex-level data to be reduced to parcels - {lh,rh}annot : str - Path to .annot file containing labels to parcels on the {left,right} - hemisphere - drop : list, optional - Specifies regions in {lh,rh}annot that should be removed from the - parcellated version of `data`. If not specified, vertices corresponding - to parcels defined in `netneurotools.freesurfer.FSIGNORE` will be - removed. Default: None - - Returns - ------- - reduced : numpy.ndarray - Parcellated `data`, without regions specified in `drop` - """ - if drop is None: - drop = FSIGNORE - drop = _decode_list(drop) - - data = np.vstack(data) - - n_parc = expected = 0 - for a in [lhannot, rhannot]: - vn, _, names = read_annot(a) - expected += len(vn) - names = _decode_list(names) - n_parc += len(names) - len(set(drop) & set(names)) - if expected != len(data): - raise ValueError('Number of vertices in provided annotation files ' - 'differs from size of vertex-level data array.\n' - ' EXPECTED: {} vertices\n' - ' RECEIVED: {} vertices' - .format(expected, len(data))) - - reduced = np.zeros((n_parc, data.shape[-1]), dtype=data.dtype) - start = end = n_parc = 0 - for annot in [lhannot, rhannot]: - # read files and update end index for `data` - labels, ctab, names = read_annot(annot) - names = _decode_list(names) - - indices = np.unique(labels) - end += len(labels) - - for idx in range(data.shape[-1]): - # get average of vertex-level data within parcels - # set all NaN values to 0 before calling `_stats` because we are - # returning sums, so the 0 values won't impact the sums (if we left - # the NaNs then all parcels with even one NaN entry would be NaN) - currdata = np.squeeze(data[start:end, idx]) - isna = np.isnan(currdata) - counts, sums = _stats(np.nan_to_num(currdata), labels, indices) - - # however, we do need to account for the NaN values in the counts - # so that our means are similar to what we'd get from e.g., - # np.nanmean here, our "sums" are the counts of NaN values in our - # parcels - _, nacounts = _stats(isna, labels, indices) - counts = (np.asanyarray(counts, dtype=float) - - np.asanyarray(nacounts, dtype=float)) - - with np.errstate(divide='ignore', invalid='ignore'): - currdata = sums / counts - - # get indices of unkown and corpuscallosum and delete from parcels - inds = sorted([names.index(f) for f in set(drop) & set(names)]) - currdata = np.delete(currdata, inds) - - # store parcellated data - reduced[n_parc:n_parc + len(names) - len(inds), idx] = currdata - - start = end - n_parc += len(names) - len(inds) - - return np.squeeze(reduced) - - -def _get_fsaverage_coords(version='fsaverage', surface='sphere'): - """ - Get vertex coordinates for specified `surface` of fsaverage `version`. - - Parameters - ---------- - version : str, optional - One of {'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', - 'fsaverage6'}. Default: 'fsaverage' - surface : str, optional - Surface for which to return vertex coordinates. Default: 'sphere' - - Returns - ------- - coords : (N, 3) numpy.ndarray - xyz coordinates of vertices for {left,right} hemisphere - hemiid : (N,) numpy.ndarray - Array denoting hemisphere designation of entries in `coords`, where - `hemiid=0` denotes the left and `hemiid=1` the right hemisphere - """ - # get coordinates and hemisphere designation for spin generation - lhsphere, rhsphere = fetch_fsaverage(version)[surface] - coords, hemi = [], [] - for n, sphere in enumerate([lhsphere, rhsphere]): - coords.append(read_geometry(sphere)[0]) - hemi.append(np.ones(len(coords[-1])) * n) - - return np.vstack(coords), np.hstack(hemi) - - -def _get_fsaverage_spins(version='fsaverage', spins=None, n_rotate=1000, - **kwargs): - """ - Generate spatial permutation resamples for fsaverage `version`. - - If `spins` are provided then performs checks to confirm they are valid - - Parameters - ---------- - version : str, optional - Specifies which version of `fsaverage` for which to generate spins. - Must be one of {'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', - 'fsaverage6'}. Default: 'fsaverage' - spins : array_like, optional - Pre-computed spins to use instead of generating them on the fly. If not - provided will use other provided parameters to create them. Default: - None - n_rotate : int, optional - Number of rotations to generate. Default: 1000 - return_cost : bool, optional - Whether to return cost array (specified as Euclidean distance) for each - coordinate for each rotation. Currently this option is not supported if - pre-computed `spins` are provided. Default: True - kwargs : key-value pairs - Keyword arguments passed to `netneurotools.stats.gen_spinsamples` - - Returns - ------- - spins : (N, S) numpy.ndarray - Resampling array - """ - if spins is None: - coords, hemiid = _get_fsaverage_coords(version, 'sphere') - spins = gen_spinsamples(coords, hemiid, n_rotate=n_rotate, - **kwargs) - if kwargs.get('return_cost'): - return spins - - spins = np.asarray(spins, dtype='int32') - if spins.shape[-1] != n_rotate: - warnings.warn('Shape of provided `spins` array does not match ' - 'number of rotations requested with `n_rotate`. ' - 'Ignoring specified `n_rotate` parameter and using ' - 'all provided `spins`.', stacklevel=2) - n_rotate = spins.shape[-1] - - return spins, None - - -def spin_data(data, *, lhannot, rhannot, version='fsaverage', n_rotate=1000, - spins=None, drop=None, verbose=False, **kwargs): - """ - Project parcellated `data` to surface, rotates, and re-parcellates. - - Projection to the surface uses `{lh,rh}annot` files. Rotation uses vertex - coordinates from the specified fsaverage `version` and relies on - :func:`netneurotools.stats.gen_spinsamples`. Re-parcellated data will not - be exactly identical to original values due to re-averaging process. - Parcels subsumed by regions in `drop` will be listed as NaN. - - Parameters - ---------- - data : (N,) numpy.ndarray - Parcellated data to be rotated. Parcels should be ordered by [left, - right] hemisphere; ordering within hemisphere should correspond to the - provided `{lh,rh}annot` annotation files. - {lh,rh}annot : str - Path to .annot file containing labels to parcels on the {left,right} - hemisphere - version : str, optional - Specifies which version of `fsaverage` provided annotation files - correspond to. Must be one of {'fsaverage', 'fsaverage3', 'fsaverage4', - 'fsaverage5', 'fsaverage6'}. Default: 'fsaverage' - n_rotate : int, optional - Number of rotations to generate. Default: 1000 - spins : array_like, optional - Pre-computed spins to use instead of generating them on the fly. If not - provided will use other provided parameters to create them. Default: - None - drop : list, optional - Specifies regions in {lh,rh}annot that are not present in `data`. NaNs - will be inserted in place of the these regions in the returned data. If - not specified, parcels defined in `netneurotools.freesurfer.FSIGNORE` - are assumed to not be present. Default: None - verbose : bool, optional - Whether to print occasional status messages. Default: False - kwargs : key-value pairs - Keyword arguments passed to `netneurotools.stats.gen_spinsamples` - - Returns - ------- - rotated : (N, `n_rotate`) numpy.ndarray - Rotated `data - cost : (N, `n_rotate`,) numpy.ndarray - Cost (specified as Euclidean distance) of re-assigning each coordinate - for every rotation in `spinsamples`. Only provided if `return_cost` is - True. - """ - if drop is None: - drop = FSIGNORE - - # get coordinates and hemisphere designation for spin generation - vertices = parcels_to_vertices(data, lhannot=lhannot, rhannot=rhannot, - drop=drop) - - # get spins + cost (if requested) - spins, cost = _get_fsaverage_spins(version=version, spins=spins, - n_rotate=n_rotate, - verbose=verbose, **kwargs) - if len(vertices) != len(spins): - raise ValueError('Provided annotation files have a different ' - 'number of vertices than the specified fsaverage ' - 'surface.\n ANNOTATION: {} vertices\n ' - 'FSAVERAGE: {} vertices' - .format(len(vertices), len(spins))) - - spun = np.zeros(data.shape + (n_rotate,)) - for n in range(n_rotate): - if verbose: - msg = f'Reducing vertices to parcels: {n:>5}/{n_rotate}' - print(msg, end='\b' * len(msg), flush=True) - spun[..., n] = vertices_to_parcels(vertices[spins[:, n]], - lhannot=lhannot, rhannot=rhannot, - drop=drop) - - if verbose: - print(' ' * len(msg) + '\b' * len(msg), end='', flush=True) - - if kwargs.get('return_cost'): - return spun, cost - - return spun - - -def spin_parcels(*, lhannot, rhannot, version='fsaverage', n_rotate=1000, - spins=None, drop=None, verbose=False, **kwargs): - """ - Rotate parcels in `{lh,rh}annot` and re-assigns based on maximum overlap. - - Vertex labels are rotated with :func:`netneurotools.stats.gen_spinsamples` - and a new label is assigned to each *parcel* based on the region maximally - overlapping with its boundaries. - - Parameters - ---------- - {lh,rh}annot : str - Path to .annot file containing labels to parcels on the {left,right} - hemisphere - version : str, optional - Specifies which version of `fsaverage` provided annotation files - correspond to. Must be one of {'fsaverage', 'fsaverage3', 'fsaverage4', - 'fsaverage5', 'fsaverage6'}. Default: 'fsaverage' - n_rotate : int, optional - Number of rotations to generate. Default: 1000 - spins : array_like, optional - Pre-computed spins to use instead of generating them on the fly. If not - provided will use other provided parameters to create them. Default: - None - drop : list, optional - Specifies regions in {lh,rh}annot that are not present in `data`. NaNs - will be inserted in place of the these regions in the returned data. If - not specified, parcels defined in `netneurotools.freesurfer.FSIGNORE` - are assumed to not be present. Default: None - seed : {int, np.random.RandomState instance, None}, optional - Seed for random number generation. Default: None - verbose : bool, optional - Whether to print occasional status messages. Default: False - return_cost : bool, optional - Whether to return cost array (specified as Euclidean distance) for each - coordinate for each rotation. Default: True - kwargs : key-value pairs - Keyword arguments passed to `netneurotools.stats.gen_spinsamples` - - Returns - ------- - spinsamples : (N, `n_rotate`) numpy.ndarray - Resampling matrix to use in permuting data parcellated with labels from - {lh,rh}annot, where `N` is the number of parcels. Indices of -1 - indicate that the parcel was completely encompassed by regions in - `drop` and should be ignored. - cost : (N, `n_rotate`,) numpy.ndarray - Cost (specified as Euclidean distance) of re-assigning each coordinate - for every rotation in `spinsamples`. Only provided if `return_cost` is - True. - """ - - def overlap(vals): - """Return most common non-negative value in `vals`; -1 if all neg.""" - vals = np.asarray(vals) - vals, counts = np.unique(vals[vals > 0], return_counts=True) - try: - return vals[counts.argmax()] - except ValueError: - return -1 - - if drop is None: - drop = FSIGNORE - drop = _decode_list(drop) - - # get vertex-level labels (set drop labels to - values) - vertices, end = [], 0 - for n, annot in enumerate([lhannot, rhannot]): - labels, ctab, names = read_annot(annot) - names = _decode_list(names) - todrop = set(names) & set(drop) - inds = [names.index(f) - n for n, f in enumerate(todrop)] - labs = np.arange(len(names) - len(inds)) + (end - (len(inds) * n)) - insert = np.arange(-1, -(len(inds) + 1), -1) - vertices.append(np.insert(labs, inds, insert)[labels]) - end += len(names) - vertices = np.hstack(vertices) - labels = np.unique(vertices) - mask = labels > -1 - - # get spins + cost (if requested) - spins, cost = _get_fsaverage_spins(version=version, spins=spins, - n_rotate=n_rotate, verbose=verbose, - **kwargs) - if len(vertices) != len(spins): - raise ValueError('Provided annotation files have a different ' - 'number of vertices than the specified fsaverage ' - 'surface.\n ANNOTATION: {} vertices\n ' - 'FSAVERAGE: {} vertices' - .format(len(vertices), len(spins))) - - # spin and assign regions based on max overlap - regions = np.zeros((len(labels[mask]), n_rotate), dtype='int32') - for n in range(n_rotate): - if verbose: - msg = f'Calculating parcel overlap: {n:>5}/{n_rotate}' - print(msg, end='\b' * len(msg), flush=True) - regions[:, n] = labeled_comprehension(vertices[spins[:, n]], vertices, - labels, overlap, int, -1)[mask] - - if kwargs.get('return_cost'): - return regions, cost - - return regions diff --git a/netneurotools/interface/__init__.py b/netneurotools/interface/__init__.py new file mode 100644 index 0000000..1b474b1 --- /dev/null +++ b/netneurotools/interface/__init__.py @@ -0,0 +1,3 @@ +"""Functions for interfacing with common tools.""" + +__all__ = [] diff --git a/netneurotools/interface/freesurfer.py b/netneurotools/interface/freesurfer.py new file mode 100644 index 0000000..9efc02f --- /dev/null +++ b/netneurotools/interface/freesurfer.py @@ -0,0 +1 @@ +"""Functions for working with FreeSurfer data and parcellations.""" diff --git a/netneurotools/interface/tests/__init__.py b/netneurotools/interface/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/netneurotools/interface/tests/test_freesurfer.py b/netneurotools/interface/tests/test_freesurfer.py new file mode 100644 index 0000000..fcdcd23 --- /dev/null +++ b/netneurotools/interface/tests/test_freesurfer.py @@ -0,0 +1 @@ +"""For testing netneurotools.interface.freesurfer functionality.""" diff --git a/netneurotools/metrics/__init__.py b/netneurotools/metrics/__init__.py new file mode 100644 index 0000000..51d8181 --- /dev/null +++ b/netneurotools/metrics/__init__.py @@ -0,0 +1,66 @@ +"""Magics on networks.""" + + +from .bct import ( + # routing + degrees_und, degrees_dir, + distance_wei_floyd, retrieve_shortest_path, + navigation_wu, get_navigation_path_length, + # diffusion + communicability_bin, communicability_wei, + path_transitivity, search_information, + mean_first_passage_time, diffusion_efficiency, + resource_efficiency_bin, flow_graph, + # other + assortativity, + matching_ind_und, + rich_feeder_peripheral +) + + +from .metrics_utils import ( + _fast_binarize, + _graph_laplacian, +) + + +from .spreading import ( + simulate_atrophy +) + + +from .statistical import ( + network_pearsonr, + network_pearsonr_numba, + network_pearsonr_pairwise, + effective_resistance, + network_polarisation, + network_variance, + network_variance_numba, + network_covariance, + network_covariance_numba +) + + +__all__ = [ + # bct + 'degrees_und', 'degrees_dir', + 'distance_wei_floyd', 'retrieve_shortest_path', + 'navigation_wu', 'get_navigation_path_length', + 'communicability_bin', 'communicability_wei', + 'path_transitivity', 'search_information', + 'mean_first_passage_time', 'diffusion_efficiency', + 'resource_efficiency_bin', 'flow_graph', + 'assortativity', 'matching_ind_und', + 'rich_feeder_peripheral', + # metrics_utils + '_fast_binarize', '_graph_laplacian', + # spreading + 'simulate_atrophy', + # statistical + 'network_pearsonr', 'network_pearsonr_numba', + 'network_pearsonr_pairwise', 'effective_resistance', + 'network_polarisation', 'network_variance', + 'network_variance_numba', 'network_covariance', + 'network_covariance_numba' +] diff --git a/netneurotools/metrics.py b/netneurotools/metrics/bct.py similarity index 95% rename from netneurotools/metrics.py rename to netneurotools/metrics/bct.py index 2b8abf2..c98df74 100644 --- a/netneurotools/metrics.py +++ b/netneurotools/metrics/bct.py @@ -1,6 +1,5 @@ -# -*- coding: utf-8 -*- """ -Functions for calculating network metrics. +Functions for calculating brain connectivity metrics. Uses naming conventions adopted from the Brain Connectivity Toolbox (https://sites.google.com/site/bctnet/). @@ -18,27 +17,7 @@ except ImportError: use_numba = False - -def _binarize(W): - """ - Binarize a matrix. - - Parameters - ---------- - W : (N, N) array_like - Matrix to be binarized - - Returns - ------- - binarized : (N, N) numpy.ndarray - Binarized matrix - """ - return (W > 0) * 1 - - -if use_numba: - _binarize = njit(_binarize) - +from .metrics_utils import _fast_binarize def degrees_und(W): """ @@ -56,7 +35,7 @@ def degrees_und(W): deg : (N,) numpy.ndarray Degree of each node in `W` """ - return np.sum(_binarize(W), axis=0) + return np.sum(_fast_binarize(W), axis=0) def degrees_dir(W): @@ -78,13 +57,15 @@ def degrees_dir(W): deg : (N,) numpy.ndarray Degree (in-degree + out-degree) of each node in `W` """ - W_bin = _binarize(W) + W_bin = _fast_binarize(W) deg_in = np.sum(W_bin, axis=0) deg_out = np.sum(W_bin, axis=1) deg = deg_in + deg_out return deg_in, deg_out, deg + + def distance_wei_floyd(D): """ Compute the all-pairs shortest path length using Floyd-Warshall algorithm. @@ -165,178 +146,6 @@ def retrieve_shortest_path(s, t, p_mat): retrieve_shortest_path = njit(retrieve_shortest_path) -def communicability_bin(adjacency, normalize=False): - """ - Compute the communicability of pairs of nodes in `adjacency`. - - Parameters - ---------- - adjacency : (N, N) array_like - Unweighted, direct/undirected connection weight/length array - normalize : bool, optional - Whether to normalize `adjacency` by largest eigenvalue prior to - calculation of communicability metric. Default: False - - Returns - ------- - comm : (N, N) numpy.ndarray - Symmetric array representing communicability of nodes {i, j} - - References - ---------- - Estrada, E., & Hatano, N. (2008). Communicability in complex networks. - Physical Review E, 77(3), 036111. - - Examples - -------- - >>> from netneurotools import metrics - - >>> A = np.array([[1, 0, 1], [0, 1, 1], [1, 0, 1]]) - >>> Q = metrics.communicability_bin(A) - >>> Q - array([[4.19452805, 0. , 3.19452805], - [1.47624622, 2.71828183, 3.19452805], - [3.19452805, 0. , 4.19452805]]) - """ - if not np.any(np.logical_or(adjacency == 0, adjacency == 1)): - raise ValueError('Provided adjancecy matrix must be unweighted.') - - # normalize by largest eigenvalue to prevent communicability metric from - # "blowing up" - if normalize: - norm = np.linalg.eigvals(adjacency).max() - adjacency = adjacency / norm - - return scipy.sparse.linalg.expm(adjacency) - - -def communicability_wei(adjacency): - """ - Compute the communicability of pairs of nodes in `adjacency`. - - Parameters - ---------- - adjacency : (N, N) array_like - Weighted, direct/undirected connection weight/length array - - Returns - ------- - cmc : (N, N) numpy.ndarray - Symmetric array representing communicability of nodes {i, j} - - References - ---------- - Crofts, J. J., & Higham, D. J. (2009). A weighted communicability measure - applied to complex brain networks. Journal of the Royal Society Interface, - 6(33), 411-414. - - Examples - -------- - >>> from netneurotools import metrics - - >>> A = np.array([[2, 0, 3], [0, 2, 1], [0.5, 0, 1]]) - >>> Q = metrics.communicability_wei(A) - >>> Q - array([[0. , 0. , 1.93581903], - [0.07810379, 0. , 0.94712177], - [0.32263651, 0. , 0. ]]) - """ - # negative square root of nodal degrees - row_sum = adjacency.sum(1) - neg_sqrt = np.power(row_sum, -0.5) - square_sqrt = np.diag(neg_sqrt) - - # normalize input matrix - for_expm = square_sqrt @ adjacency @ square_sqrt - - # calculate matrix exponential of normalized matrix - cmc = scipy.sparse.linalg.expm(for_expm) - cmc[np.diag_indices_from(cmc)] = 0 - - return cmc - - -def rich_feeder_peripheral(x, sc, stat='median'): - """ - Calculate connectivity values in rich, feeder, and peripheral edges. - - Parameters - ---------- - x : (N, N) numpy.ndarray - Symmetric correlation or connectivity matrix - sc : (N, N) numpy.ndarray - Binary structural connectivity matrix - stat : {'mean', 'median'}, optional - Statistic to use over rich/feeder/peripheral links. Default: 'median' - - Returns - ------- - rfp : (3, k) numpy.ndarray - Array of median rich (0), feeder (1), and peripheral (2) - values, defined by `x`. `k` is the maximum degree defined on `sc`. - pvals : (3, k) numpy.ndarray - p-value for each link, computed using Welch's t-test. - Rich links are compared against non-rich links. Feeder links are - compared against peripheral links. Peripheral links are compared - against feeder links. T-test is one-sided. - - Notes - ----- - This code was written by Justine Hansen who promises to fix and even - optimize the code should any issues arise, provided you let her know. - """ - stats = ['mean', 'median'] - if stat not in stats: - raise ValueError(f'Provided stat {stat} not valid.\ - Must be one of {stats}') - - nnodes = len(sc) - mask = np.triu(np.ones(nnodes), 1) > 0 - node_degree = degrees_und(sc) - k = np.max(node_degree).astype(np.int64) - rfp_label = np.zeros((len(sc[mask]), k)) - - for degthresh in range(k): # for each degree threshold - hub_idx = np.where(node_degree >= degthresh) # find the hubs - hub = np.zeros([nnodes, 1]) - hub[hub_idx, :] = 1 - - rfp = np.zeros([nnodes, nnodes]) # for each link, define rfp - for edge1 in range(nnodes): - for edge2 in range(nnodes): - if hub[edge1] + hub[edge2] == 2: - rfp[edge1, edge2] = 1 # rich - if hub[edge1] + hub[edge2] == 1: - rfp[edge1, edge2] = 2 # feeder - if hub[edge1] + hub[edge2] == 0: - rfp[edge1, edge2] = 3 # peripheral - rfp_label[:, degthresh] = rfp[mask] - - rfp = np.zeros([3, k]) - pvals = np.zeros([3, k]) - for degthresh in range(k): - - redfunc = np.median if stat == 'median' else np.mean - for linktype in range(3): - rfp[linktype, degthresh] = redfunc(x[mask][rfp_label[:, degthresh] - == linktype + 1]) - - # p-value (one-sided Welch's t-test) - _, pvals[0, degthresh] = ttest_ind( - x[mask][rfp_label[:, degthresh] == 1], - x[mask][rfp_label[:, degthresh] != 1], - equal_var=False, alternative='greater') - _, pvals[1, degthresh] = ttest_ind( - x[mask][rfp_label[:, degthresh] == 2], - x[mask][rfp_label[:, degthresh] == 3], - equal_var=False, alternative='greater') - _, pvals[2, degthresh] = ttest_ind( - x[mask][rfp_label[:, degthresh] == 3], - x[mask][rfp_label[:, degthresh] == 2], - equal_var=False, alternative='greater') - - return rfp, pvals - def navigation_wu(nav_dist_mat, sc_mat): """ @@ -477,6 +286,158 @@ def get_navigation_path_length(nav_paths, alt_dist_mat): return nav_path_len + +def communicability_bin(adjacency, normalize=False): + """ + Compute the communicability of pairs of nodes in `adjacency`. + + Parameters + ---------- + adjacency : (N, N) array_like + Unweighted, direct/undirected connection weight/length array + normalize : bool, optional + Whether to normalize `adjacency` by largest eigenvalue prior to + calculation of communicability metric. Default: False + + Returns + ------- + comm : (N, N) numpy.ndarray + Symmetric array representing communicability of nodes {i, j} + + References + ---------- + Estrada, E., & Hatano, N. (2008). Communicability in complex networks. + Physical Review E, 77(3), 036111. + + Examples + -------- + >>> from netneurotools import metrics + + >>> A = np.array([[1, 0, 1], [0, 1, 1], [1, 0, 1]]) + >>> Q = metrics.communicability_bin(A) + >>> Q + array([[4.19452805, 0. , 3.19452805], + [1.47624622, 2.71828183, 3.19452805], + [3.19452805, 0. , 4.19452805]]) + """ + if not np.any(np.logical_or(adjacency == 0, adjacency == 1)): + raise ValueError('Provided adjancecy matrix must be unweighted.') + + # normalize by largest eigenvalue to prevent communicability metric from + # "blowing up" + if normalize: + norm = np.linalg.eigvals(adjacency).max() + adjacency = adjacency / norm + + return scipy.sparse.linalg.expm(adjacency) + + +def communicability_wei(adjacency): + """ + Compute the communicability of pairs of nodes in `adjacency`. + + Parameters + ---------- + adjacency : (N, N) array_like + Weighted, direct/undirected connection weight/length array + + Returns + ------- + cmc : (N, N) numpy.ndarray + Symmetric array representing communicability of nodes {i, j} + + References + ---------- + Crofts, J. J., & Higham, D. J. (2009). A weighted communicability measure + applied to complex brain networks. Journal of the Royal Society Interface, + 6(33), 411-414. + + Examples + -------- + >>> from netneurotools import metrics + + >>> A = np.array([[2, 0, 3], [0, 2, 1], [0.5, 0, 1]]) + >>> Q = metrics.communicability_wei(A) + >>> Q + array([[0. , 0. , 1.93581903], + [0.07810379, 0. , 0.94712177], + [0.32263651, 0. , 0. ]]) + """ + # negative square root of nodal degrees + row_sum = adjacency.sum(1) + neg_sqrt = np.power(row_sum, -0.5) + square_sqrt = np.diag(neg_sqrt) + + # normalize input matrix + for_expm = square_sqrt @ adjacency @ square_sqrt + + # calculate matrix exponential of normalized matrix + cmc = scipy.sparse.linalg.expm(for_expm) + cmc[np.diag_indices_from(cmc)] = 0 + + return cmc + + + +def path_transitivity(D): + """ + Calculate path transitivity. + + This function implements path transitivity, calculating the density of + local detours (triangles) that are available along the shortest paths + between all pairs of nodes. + + This function is adapted and optimized from the Brain Connectivity Toolbox. + + .. warning:: + Test before use. + + Parameters + ---------- + D : (N, N) ndarray + Weight or connection length matrix. Length matrix is recommended and + transform should have been applied. + + Returns + ------- + T_mat : (N, N) ndarray + Path transitivity matrix + + References + ---------- + .. [1] Goñi, J., Van Den Heuvel, M. P., Avena-Koenigsberger, + A., Velez de Mendizabal, N., Betzel, R. F., Griffa, A., ... & + Sporns, O. (2014). Resting-brain functional connectivity predicted + by analytic measures of network communication. Proceedings of the + National Academy of Sciences, 111(2), 833-838. + """ + n = len(D) + m = np.zeros((n, n)) + T_mat = np.zeros((n, n)) + + deg_wu = np.sum(D, axis=0) + + for i in range(n - 1): + for j in range(i + 1, n): + sig_and = np.logical_and(D[i, :], D[j, :]) + m[i, j] = np.dot(D[i, :] + D[j, :], sig_and) \ + / (deg_wu[i] + deg_wu[j] - 2 * D[i, j]) + m += m.transpose() + + _, p_mat = distance_wei_floyd(D) + + for i in range(n - 1): + for j in range(i + 1, n): + path = retrieve_shortest_path(i, j, p_mat) + K = len(path) + T_mat[i, j] = 2 \ + * sum([m[i, j] for i, j in itertools.combinations(path, 2)]) \ + / (K * (K - 1)) + T_mat += T_mat.transpose() + + return T_mat + + def search_information(W, D, has_memory=False): """ Calculate search information. @@ -580,115 +541,6 @@ def search_information(W, D, has_memory=False): return SI -def path_transitivity(D): - """ - Calculate path transitivity. - - This function implements path transitivity, calculating the density of - local detours (triangles) that are available along the shortest paths - between all pairs of nodes. - - This function is adapted and optimized from the Brain Connectivity Toolbox. - - .. warning:: - Test before use. - - Parameters - ---------- - D : (N, N) ndarray - Weight or connection length matrix. Length matrix is recommended and - transform should have been applied. - - Returns - ------- - T_mat : (N, N) ndarray - Path transitivity matrix - - References - ---------- - .. [1] Goñi, J., Van Den Heuvel, M. P., Avena-Koenigsberger, - A., Velez de Mendizabal, N., Betzel, R. F., Griffa, A., ... & - Sporns, O. (2014). Resting-brain functional connectivity predicted - by analytic measures of network communication. Proceedings of the - National Academy of Sciences, 111(2), 833-838. - """ - n = len(D) - m = np.zeros((n, n)) - T_mat = np.zeros((n, n)) - - deg_wu = np.sum(D, axis=0) - - for i in range(n - 1): - for j in range(i + 1, n): - sig_and = np.logical_and(D[i, :], D[j, :]) - m[i, j] = np.dot(D[i, :] + D[j, :], sig_and) \ - / (deg_wu[i] + deg_wu[j] - 2 * D[i, j]) - m += m.transpose() - - _, p_mat = distance_wei_floyd(D) - - for i in range(n - 1): - for j in range(i + 1, n): - path = retrieve_shortest_path(i, j, p_mat) - K = len(path) - T_mat[i, j] = 2 \ - * sum([m[i, j] for i, j in itertools.combinations(path, 2)]) \ - / (K * (K - 1)) - T_mat += T_mat.transpose() - - return T_mat - - -def flow_graph(W, r=None, t=1): - """ - Calculate flow graph. - - This function implements flow graph, instantiates a continuous - time random walk on network. Waiting time for walkers at each - node are distributed as Poisson with rate parameter r. - This function returns the flow graph at time t. - - .. warning:: - Test before use. - - Parameters - ---------- - W : (N, N) ndarray - Symmetric adjacency matrix. - r : (N,) or (N, 1) ndarray, optional - Rate parameter. Will be set to np.ones((N, 1)) if not specified. - Default: None - t : int, optional - Markov time. Default: 1 - - Returns - ------- - dyn : (N, N) ndarray - flow graph at time T - - References - ---------- - .. [1] Lambiotte, R., Sinatra, R., Delvenne, J. C., Evans, T. S., - Barahona, M., & Latora, V. (2011). Flow graphs: Interweaving - dynamics and structure. Physical Review E, 84(1), 017102. - .. [2] https://github.com/brain-networks/local_scfc/blob/main/fcn/fcn_flow_graph.m - """ - if r is None: - r = np.ones((W.shape[0], 1)) - else: - if r.ndim == 1: - r = r[:, None] - deg_wu = np.sum(W, axis=0, keepdims=True) # (1, N) - deg_rate = np.sum(deg_wu / r, axis=0, keepdims=True) # (N, N) => (1, N) - ps = deg_wu / (deg_rate * r) # (1, N) / (N, N) => (N, N) - laplacian = np.diagflat(r) - np.multiply(np.divide(W, deg_wu), r) # elementwise - dyn = np.multiply( - deg_rate * scipy.sparse.linalg.expm(-t * laplacian), - ps - ) # elementwise - dyn = (dyn + dyn.T) / 2 - return dyn - def mean_first_passage_time(W, tol=1e-3): """ @@ -784,6 +636,8 @@ def diffusion_efficiency(W): return GE_diff, E_diff + + def resource_efficiency_bin(W_bin, lambda_prob=0.5): """ Calculate resource efficiency and shortest-path probability. @@ -824,7 +678,7 @@ def resource_efficiency_bin(W_bin, lambda_prob=0.5): morphospace of communication efficiency in complex networks. PLoS One, 8(3), e58070. """ - W_bin = _binarize(W_bin) + W_bin = _fast_binarize(W_bin) if not (0 < lambda_prob < 1): raise ValueError("lambda_prob must be between 0 and 1.") @@ -871,6 +725,64 @@ def resource_efficiency_bin(W_bin, lambda_prob=0.5): return E_res, prob_spl +def flow_graph(W, r=None, t=1): + """ + Calculate flow graph. + + This function implements flow graph, instantiates a continuous + time random walk on network. Waiting time for walkers at each + node are distributed as Poisson with rate parameter r. + This function returns the flow graph at time t. + + .. warning:: + Test before use. + + Parameters + ---------- + W : (N, N) ndarray + Symmetric adjacency matrix. + r : (N,) or (N, 1) ndarray, optional + Rate parameter. Will be set to np.ones((N, 1)) if not specified. + Default: None + t : int, optional + Markov time. Default: 1 + + Returns + ------- + dyn : (N, N) ndarray + flow graph at time T + + References + ---------- + .. [1] Lambiotte, R., Sinatra, R., Delvenne, J. C., Evans, T. S., + Barahona, M., & Latora, V. (2011). Flow graphs: Interweaving + dynamics and structure. Physical Review E, 84(1), 017102. + .. [2] https://github.com/brain-networks/local_scfc/blob/main/fcn/fcn_flow_graph.m + """ + if r is None: + r = np.ones((W.shape[0], 1)) + else: + if r.ndim == 1: + r = r[:, None] + deg_wu = np.sum(W, axis=0, keepdims=True) # (1, N) + deg_rate = np.sum(deg_wu / r, axis=0, keepdims=True) # (N, N) => (1, N) + ps = deg_wu / (deg_rate * r) # (1, N) / (N, N) => (N, N) + laplacian = np.diagflat(r) - np.multiply(np.divide(W, deg_wu), r) # elementwise + dyn = np.multiply( + deg_rate * scipy.sparse.linalg.expm(-t * laplacian), + ps + ) # elementwise + dyn = (dyn + dyn.T) / 2 + return dyn + + + +def assortativity(W, r=None): + """Calculate assortativity.""" + pass + + + def matching_ind_und(W): """ Calculate undirected matching index. @@ -928,37 +840,83 @@ def matching_ind_und(W): return M0 -def _graph_laplacian(W): - r""" - Compute the graph Laplacian of a weighted adjacency matrix. - - Graph Laplacian is defined as the degree matrix minus the adjacency - matrix :math:`L = D - W`, where :math:`D` is the degree matrix and - is defined as :math:`D_{ii} = \sum_j W_{ij}`. - - The graph Laplacian matrix :math:`L` has the form of - - .. math:: - L = \begin{bmatrix} - d_1 & -w_{12} & \cdots & -w_{1n} \\ - -w_{21} & d_2 & \cdots & -w_{2n} \\ - \vdots & \vdots & \ddots & \vdots \\ - -w_{n1} & -w_{n2} & \cdots & d_n - \end{bmatrix} +def rich_feeder_peripheral(x, sc, stat='median'): + """ + Calculate connectivity values in rich, feeder, and peripheral edges. Parameters ---------- - W : (N, N) array_like - Weighted, directed/undirected connection weight/length array + x : (N, N) numpy.ndarray + Symmetric correlation or connectivity matrix + sc : (N, N) numpy.ndarray + Binary structural connectivity matrix + stat : {'mean', 'median'}, optional + Statistic to use over rich/feeder/peripheral links. Default: 'median' Returns ------- - L : (N, N) numpy.ndarray - Graph Laplacian of `W` + rfp : (3, k) numpy.ndarray + Array of median rich (0), feeder (1), and peripheral (2) + values, defined by `x`. `k` is the maximum degree defined on `sc`. + pvals : (3, k) numpy.ndarray + p-value for each link, computed using Welch's t-test. + Rich links are compared against non-rich links. Feeder links are + compared against peripheral links. Peripheral links are compared + against feeder links. T-test is one-sided. + + Notes + ----- + This code was written by Justine Hansen who promises to fix and even + optimize the code should any issues arise, provided you let her know. """ - D = np.diag(np.sum(W, axis=0)) - return D - W + stats = ['mean', 'median'] + if stat not in stats: + raise ValueError(f'Provided stat {stat} not valid.\ + Must be one of {stats}') + nnodes = len(sc) + mask = np.triu(np.ones(nnodes), 1) > 0 + node_degree = degrees_und(sc) + k = np.max(node_degree).astype(np.int64) + rfp_label = np.zeros((len(sc[mask]), k)) -if use_numba: - _graph_laplacian = njit(_graph_laplacian) # ("float64[:,::1](float64[:,::1])") + for degthresh in range(k): # for each degree threshold + hub_idx = np.where(node_degree >= degthresh) # find the hubs + hub = np.zeros([nnodes, 1]) + hub[hub_idx, :] = 1 + + rfp = np.zeros([nnodes, nnodes]) # for each link, define rfp + for edge1 in range(nnodes): + for edge2 in range(nnodes): + if hub[edge1] + hub[edge2] == 2: + rfp[edge1, edge2] = 1 # rich + if hub[edge1] + hub[edge2] == 1: + rfp[edge1, edge2] = 2 # feeder + if hub[edge1] + hub[edge2] == 0: + rfp[edge1, edge2] = 3 # peripheral + rfp_label[:, degthresh] = rfp[mask] + + rfp = np.zeros([3, k]) + pvals = np.zeros([3, k]) + for degthresh in range(k): + + redfunc = np.median if stat == 'median' else np.mean + for linktype in range(3): + rfp[linktype, degthresh] = redfunc(x[mask][rfp_label[:, degthresh] + == linktype + 1]) + + # p-value (one-sided Welch's t-test) + _, pvals[0, degthresh] = ttest_ind( + x[mask][rfp_label[:, degthresh] == 1], + x[mask][rfp_label[:, degthresh] != 1], + equal_var=False, alternative='greater') + _, pvals[1, degthresh] = ttest_ind( + x[mask][rfp_label[:, degthresh] == 2], + x[mask][rfp_label[:, degthresh] == 3], + equal_var=False, alternative='greater') + _, pvals[2, degthresh] = ttest_ind( + x[mask][rfp_label[:, degthresh] == 3], + x[mask][rfp_label[:, degthresh] == 2], + equal_var=False, alternative='greater') + + return rfp, pvals diff --git a/netneurotools/metrics/communication.py b/netneurotools/metrics/communication.py new file mode 100644 index 0000000..6d126b9 --- /dev/null +++ b/netneurotools/metrics/communication.py @@ -0,0 +1 @@ +"""Functions for calculating network communication metrics.""" diff --git a/netneurotools/metrics/control.py b/netneurotools/metrics/control.py new file mode 100644 index 0000000..b1e4b1c --- /dev/null +++ b/netneurotools/metrics/control.py @@ -0,0 +1 @@ +"""Functions for calculating network control metrics.""" diff --git a/netneurotools/metrics/metrics_utils.py b/netneurotools/metrics/metrics_utils.py new file mode 100644 index 0000000..908d66a --- /dev/null +++ b/netneurotools/metrics/metrics_utils.py @@ -0,0 +1,65 @@ +"""Functions for supporting network metrics.""" + +import numpy as np + +try: + from numba import njit + use_numba = True +except ImportError: + use_numba = False + +def _fast_binarize(W): + """ + Binarize a matrix. + + Parameters + ---------- + W : (N, N) array_like + Matrix to be binarized + + Returns + ------- + binarized : (N, N) numpy.ndarray + Binarized matrix + """ + return (W > 0) * 1 + + +if use_numba: + _fast_binarize = njit(_fast_binarize) + + +def _graph_laplacian(W): + r""" + Compute the graph Laplacian of a weighted adjacency matrix. + + Graph Laplacian is defined as the degree matrix minus the adjacency + matrix :math:`L = D - W`, where :math:`D` is the degree matrix and + is defined as :math:`D_{ii} = \sum_j W_{ij}`. + + The graph Laplacian matrix :math:`L` has the form of + + .. math:: + L = \begin{bmatrix} + d_1 & -w_{12} & \cdots & -w_{1n} \\ + -w_{21} & d_2 & \cdots & -w_{2n} \\ + \vdots & \vdots & \ddots & \vdots \\ + -w_{n1} & -w_{n2} & \cdots & d_n + \end{bmatrix} + + Parameters + ---------- + W : (N, N) array_like + Weighted, directed/undirected connection weight/length array + + Returns + ------- + L : (N, N) numpy.ndarray + Graph Laplacian of `W` + """ + D = np.diag(np.sum(W, axis=0)) + return D - W + + +if use_numba: + _graph_laplacian = njit(_graph_laplacian) # ("float64[:,::1](float64[:,::1])") \ No newline at end of file diff --git a/netneurotools/metrics/spreading.py b/netneurotools/metrics/spreading.py new file mode 100644 index 0000000..b91f7bf --- /dev/null +++ b/netneurotools/metrics/spreading.py @@ -0,0 +1,6 @@ +"""Functions for calculating network spreading models.""" + + +def simulate_atrophy(): + """Function to simulate atrophy in a network.""" + pass diff --git a/netneurotools/metrics/statistical.py b/netneurotools/metrics/statistical.py new file mode 100644 index 0000000..ea5e945 --- /dev/null +++ b/netneurotools/metrics/statistical.py @@ -0,0 +1,660 @@ +"""Functions for calculating statistical network metrics.""" + +import numpy as np + +try: + from numba import njit + use_numba = True +except ImportError: + use_numba = False + +from .metrics_utils import _graph_laplacian + +def network_pearsonr(annot1, annot2, weight): + r""" + Calculate pearson correlation between two annotation vectors. + + .. warning:: + Test before use. + + Parameters + ---------- + annot1 : (N,) array_like + First annotation vector, demean will be applied. + annot2 : (N,) array_like + Second annotation vector, demean will be applied. + weight : (N, N) array_like + Weight matrix. Diagonal elements should be 1. + + Returns + ------- + corr : float + Network correlation between `annot1` and `annot2` + + Notes + ----- + If Pearson correlation is represented as + + .. math:: + \rho_{x,y} = \dfrac{ + \mathrm{sum}(I \times (\hat{x} \otimes \hat{y})) + }{ + \sigma_x \sigma_y + } + + The network correlation is defined analogously as + + .. math:: + \rho_{x,y,G} = \dfrac{ + \mathrm{sum}(W \times (\hat{x} \otimes \hat{y})) + }{ + \sigma_{x,W} \sigma_{y,W} + } + + where :math:`\hat{x}` and :math:`\hat{y}` are the demeaned annotation vectors, + + The weight matrix :math:`W` is used to represent the network structure. + It is usually in the form of :math:`W = \\exp(-kL)` where :math:`L` is the + length matrix and :math:`k` is a decay parameter. + + Example using shortest path length as weight + + .. code:: python + + spl, _ = distance_wei_floyd(D) # input should be distance matrix + spl_wei = 1 / np.exp(spl) + netcorr = network_pearsonr(annot1, annot2, spl_wei) + + Example using (inverse) effective resistance as weight + + .. code:: python + + R_eff = effective_resistance(W) + R_eff_norm = R_eff / np.max(R_eff) + W = 1 / R_eff_norm + W = W / np.max(W) + np.fill_diagonal(W, 1.0) + netcorr = network_pearsonr(annot1, annot2, W) + + References + ---------- + .. [1] Coscia, M. (2021). Pearson correlations on complex networks. + Journal of Complex Networks, 9(6), cnab036. + https://doi.org/10.1093/comnet/cnab036 + + + See Also + -------- + netneurotools.stats.network_pearsonr_pairwise + """ + annot1 = annot1 - np.mean(annot1) + annot2 = annot2 - np.mean(annot2) + upper = np.sum(np.multiply(weight, np.outer(annot1, annot2))) + lower1 = np.sum(np.multiply(weight, np.outer(annot1, annot1))) + lower2 = np.sum(np.multiply(weight, np.outer(annot2, annot2))) + return upper / np.sqrt(lower1) / np.sqrt(lower2) + + +def network_pearsonr_numba(annot1, annot2, weight): + """ + Numba version of :meth:`netneurotools.stats.network_pearsonr`. + + .. warning:: + Test before use. + + Parameters + ---------- + annot1 : (N,) array_like + First annotation vector, demean will be applied. + annot2 : (N,) array_like + Second annotation vector, demean will be applied. + weight : (N, N) array_like + Weight matrix. Diagonal elements should be 1. + + Returns + ------- + corr : float + Network correlation between `annot1` and `annot2` + """ + n = annot1.shape[0] + annot1 = annot1 - np.mean(annot1) + annot2 = annot2 - np.mean(annot2) + upper, lower1, lower2 = 0.0, 0.0, 0.0 + for i in range(n): + for j in range(n): + upper += annot1[i] * annot2[j] * weight[i, j] + lower1 += annot1[i] * annot1[j] * weight[i, j] + lower2 += annot2[i] * annot2[j] * weight[i, j] + return upper / np.sqrt(lower1) / np.sqrt(lower2) + + +if use_numba: + network_pearsonr_numba = njit(network_pearsonr_numba) + + +def _cross_outer(annot_mat): + """ + Calculate cross outer product of input matrix. + + This functions is only used in `network_pearsonr_pairwise`. + + Parameters + ---------- + annot_mat : (N, D) array_like + Input matrix + + Returns + ------- + cross_outer : (N, N, D, D) numpy.ndarray + Cross outer product of `annot_mat` + """ + n_samp, n_feat = annot_mat.shape + cross_outer = np.empty((n_samp, n_samp, n_feat, n_feat), annot_mat.dtype) + for a in range(n_samp): + for b in range(n_samp): + for c in range(n_feat): + for d in range(n_feat): + cross_outer[a, b, c, d] = annot_mat[a, c] * annot_mat[b, d] + return cross_outer + + +if use_numba: + # ("float64[:,:,:,::1](float64[:,::1])") + _cross_outer = njit(_cross_outer) + + +def _multiply_sum(cross_outer, weight): + """ + Multiply and sum cross outer product. + + This functions is only used in `network_pearsonr_pairwise`. + + Parameters + ---------- + cross_outer : (N, N, D, D) array_like + Cross outer product of `annot_mat` + weight : (D, D) array_like + Weight matrix + + Returns + ------- + cross_outer_after : (N, N) numpy.ndarray + Result of multiplying and summing `cross_outer` + """ + n_samp, _, n_dim, _ = cross_outer.shape + cross_outer_after = np.empty((n_samp, n_samp), cross_outer.dtype) + for i in range(n_samp): + for j in range(n_samp): + curr_sum = 0.0 + for k in range(n_dim): + for l in range(n_dim): # noqa: E741 + curr_sum += weight[k, l] * cross_outer[i, j, k, l] + cross_outer_after[i, j] = curr_sum + return cross_outer_after + + +if use_numba: + # ("float64[:,::1](float64[:,:,:,::1],float64[:,::1])") + _multiply_sum = njit(_multiply_sum) + + +def network_pearsonr_pairwise(annot_mat, weight): + """ + Calculate pairwise network correlation between rows of `annot_mat`. + + .. warning:: + Test before use. + + Parameters + ---------- + annot_mat : (N, D) array_like + Input matrix + weight : (D, D) array_like + Weight matrix. Diagonal elements should be 1. + + Returns + ------- + corr_mat : (N, N) numpy.ndarray + Pairwise network correlation matrix + + Notes + ----- + This is a faster version of :meth:`netneurotools.stats.network_pearsonr` + for calculating pairwise network correlation between rows of `annot_mat`. + Check :meth:`netneurotools.stats.network_pearsonr` for details. + + See Also + -------- + netneurotools.stats.network_pearsonr + """ + annot_mat_demean = annot_mat - np.mean(annot_mat, axis=1, keepdims=True) + if use_numba: + cross_outer = _cross_outer(annot_mat_demean) + cross_outer_after = _multiply_sum(cross_outer, weight) + else: + # https://stackoverflow.com/questions/24839481/python-matrix-outer-product + cross_outer = np.einsum('ac,bd->abcd', annot_mat_demean, annot_mat_demean) + cross_outer_after = np.sum(np.multiply(cross_outer, weight), axis=(2, 3)) + # translating the two lines below in numba does not speed up much + lower = np.sqrt(np.diagonal(cross_outer_after)) + return cross_outer_after / np.einsum('i,j', lower, lower) + + +def _onehot_quadratic_form_broadcast(Q_star): + """ + Calculate one-hot quadratic form of input matrix. + + This functions is only used in `effective_resistance`. + + Parameters + ---------- + Q_star : (N, N) array_like + Input matrix + + Returns + ------- + R_eff : (N, N) numpy.ndarray + One-hot quadratic form of `Q_star` + """ + n = Q_star.shape[0] + R_eff = np.empty((n, n), Q_star.dtype) + for i in range(n): + for j in range(n): + R_eff[i, j] = Q_star[i, i] - Q_star[j, i] - Q_star[i, j] + Q_star[j, j] + return R_eff + + +if use_numba: + # ("float64[:,::1](float64[:,::1])") + _onehot_quadratic_form_broadcast = njit(_onehot_quadratic_form_broadcast) + + +def effective_resistance(W, directed=True): + """ + Calculate effective resistance matrix. + + The effective resistance between two nodes in a graph, often used in the context + of electrical networks, is a measure that stems from the inverse of the Laplacian + matrix of the graph. + + .. warning:: + Test before use. + + Parameters + ---------- + W : (N, N) array_like + Weight matrix. + directed : bool, optional + Whether the graph is directed. This is used to determine whether to turn on + the :code:`hermitian=True` option in :func:`numpy.linalg.pinv`. When you are + using a symmetric weight matrix (while real-valued implying hermitian), you + can set this to False for better performance. Default: True + + Returns + ------- + R_eff : (N, N) numpy.ndarray + Effective resistance matrix + + Notes + ----- + The effective resistance between two nodes :math:`i` and :math:`j` is defined as + + .. math:: + R_{ij} = (e_i - e_j)^T Q^* (e_i - e_j) + + where :math:`Q^*` is the Moore-Penrose pseudoinverse of the Laplacian matrix + :math:`L` of the graph, and :math:`e_i` is the :math:`i`-th standard basis vector. + + References + ---------- + .. [1] Ellens, W., Spieksma, F. M., Van Mieghem, P., Jamakovic, A., & Kooij, + R. E. (2011). Effective graph resistance. Linear Algebra and Its Applications, + 435(10), 2491–2506. https://doi.org/10.1016/j.laa.2011.02.024 + + See Also + -------- + netneurotools.stats.network_polarisation + """ + L = _graph_laplacian(W) + Q_star = np.linalg.pinv(L, hermitian=not directed) + if use_numba: + R_eff = _onehot_quadratic_form_broadcast(Q_star) + else: + Q_star_diag = np.diag(Q_star) + R_eff = \ + Q_star_diag[:, np.newaxis] \ + - Q_star \ + - Q_star.T \ + + Q_star_diag[np.newaxis, :] + return R_eff + + +def _polariz_diff(vec): + """ + Calculate difference between positive and negative parts of a vector. + + This functions is only used in `network_polarisation`. + + Parameters + ---------- + vec : (N,) array_like + Input vector. Must have both positive and negative values. + + Returns + ------- + vec_diff : (N,) numpy.ndarray + Difference between positive and negative parts of `vec` + """ + # + vec_pos = np.maximum(vec, 0.0) + vec_pos /= np.max(vec_pos) + # + vec_neg = np.minimum(vec, 0.0) + vec_neg = np.abs(vec_neg) + vec_neg /= np.max(vec_neg) + return (vec_pos - vec_neg) + + +if use_numba: + _polariz_diff = njit(_polariz_diff) + + +def _quadratic_form(W, vec_left, vec_right, squared=False): + """ + Calculate quadratic form :math:`v_{left}^T W v_{right}`. + + Parameters + ---------- + W : (N, N) array_like + Input matrix. + vec_left : (N,) array_like + Left weight vector. + vec_right : (N,) array_like + Right weight vector. + squared : bool, optional + Whether to square the input weight matrix. Default: False + + Returns + ------- + quadratic_form : float + Quadratic form from `W`, `vec_left`, and `vec_right` + """ + # [numpy] + + # (vec_left.T @ W @ vec_right)[0, 0] + # [numba] + # vec = np.ascontiguousarray(vec[np.newaxis, :]) + n = W.shape[0] + ret = 0.0 + for i in range(n): + for j in range(n): + if squared: + ret += vec_left[i] * vec_right[j] * W[i, j]**2 + else: + ret += vec_left[i] * vec_right[j] * W[i, j] + return ret + + +if use_numba: + _quadratic_form = njit(_quadratic_form) + + +def network_polarisation(vec, W, directed=True): + r""" + Calculate polarisation of a vector on a graph. + + Network polarisation is a measure of polizzartion taken into account all the + three factors below [1]_: + + - how extreme the opinions of the people are + - how much they organize into echo chambers, and + - how these echo chambers organize in the network + + .. warning:: + Test before use. + + Parameters + ---------- + vec : (N,) array_like + Polarization vector. Must have both positive and negative values. Will be + normalized between -1 and 1 internally. + W : (N, N) array_like + Weight matrix. + directed : bool, optional + Whether the graph is directed. This is used to determine whether to turn on + the :code:`hermitian=True` option in :func:`numpy.linalg.pinv`. When you are + using a symmetric weight matrix (while real-valued implying hermitian), you + can set this to False for better performance. Default: True + + Returns + ------- + polariz : float + Polarization of `vec` on `W` + + Notes + ----- + The measure is based on the genralized Eucledian distance, defined as + + .. math:: + \delta_{G, o} = \sqrt{(o^+ - o^-)^T Q^* (o^+ - o^-)} + + where :math:`o^+` and :math:`o^-` are the positive and negative parts of the + polarization vector, and :math:`Q^*` is the Moore-Penrose pseudoinverse + of the Laplacian matrix :math:`L` of the graph. Check :func:`effective_resistance` + for similarity. + + References + ---------- + .. [1] Hohmann, M., Devriendt, K., & Coscia, M. (2023). Quantifying ideological + polarization on a network using generalized Euclidean distance. Science Advances, + 9(9), eabq2044. https://doi.org/10.1126/sciadv.abq2044 + + See Also + -------- + netneurotools.stats.effective_resistance + """ + L = _graph_laplacian(W) + Q_star = np.linalg.pinv(L, hermitian=not directed) + diff = _polariz_diff(vec) + if use_numba: + polariz_sq = _quadratic_form(Q_star, diff, diff, squared=False) + else: + polariz_sq = (diff.T @ Q_star @ diff) + return np.sqrt(polariz_sq) + + +def network_variance(vec, D): + r""" + Calculate variance of a vector on a graph. + + Network variance is a measure of variance taken into account the network + structure. + + .. warning:: + Test before use. + + Parameters + ---------- + vec : (N,) array_like + Input vector. Must be all positive. + Will be normalized internally as a probability distribution. + D : (N, N) array_like + Distance matrix. + + Returns + ------- + network_variance : float + Network variance of `vec` on `D` + + Notes + ----- + The network variance is defined as + + .. math:: + var(p) = \frac{1}{2} \sum_{i, j} p(i) p(j) d^2(i,j) + + where :math:`p` is the probability distribution of `vec`, and :math:`d(i,j)` + is the distance between node :math:`i` and :math:`j`. + + The distance matrix :math:`D` can make use of effective resistance or its + square root. + + Example using effective resistance as weight matrix + + .. code:: python + + R_eff = effective_resistance(W) + netvar = network_variance(vec, R_eff) + + References + ---------- + .. [1] Devriendt, K., Martin-Gutierrez, S., & Lambiotte, R. (2022). + Variance and covariance of distributions on graphs. SIAM Review, 64(2), + 343–359. https://doi.org/10.1137/20M1361328 + + See Also + -------- + netneurotools.stats.network_covariance + """ + p = vec / np.sum(vec) + return 0.5 * (p.T @ np.multiply(D, D) @ p) + + +def network_variance_numba(vec, D): + """ + Numba version of :meth:`netneurotools.stats.network_variance`. + + Network variance is a measure of variance taken into account the network + structure. + + .. warning:: + Test before use. + + Parameters + ---------- + vec : (N,) array_like + Input vector. Must be all positive. + Will be normalized internally as a probability distribution. + D : (N, N) array_like + Distance matrix. + + Returns + ------- + network_variance : float + Network variance of `vec` on `D` + """ + p = vec / np.sum(vec) + return 0.5 * _quadratic_form(D, p, p, squared=True) + + +if use_numba: + network_variance_numba = njit(network_variance_numba) + + +def network_covariance(joint_pmat, D, calc_marginal=True): + r""" + Calculate covariance of a joint probability matrix on a graph. + + .. warning:: + Test before use. + + Parameters + ---------- + joint_pmat : (N, N) array_like + Joint probability matrix. Please make sure that it is valid. + D : (N, N) array_like + Distance matrix. + calc_marginal : bool, optional + Whether to calculate marginal variance. It will be marginally faster if + :code:`calc_marginal=False` (returning marginal variances as 0). Default: True + + Returns + ------- + network_covariance : float + Covariance of `joint_pmat` on `D` + var_p : float + Marginal variance of `joint_pmat` on `D`. + Will be 0 if :code:`calc_marginal=False` + var_q : float + Marginal variance of `joint_pmat` on `D`. + Will be 0 if :code:`calc_marginal=False` + + Notes + ----- + The network variance is defined as + + .. math:: + cov(P) = \frac{1}{2} \sum_{i, j} [p(i) q(j) - P(i,j)] d^2(i,j) + + where :math:`P` is the joint probability matrix, :math:`p` and :math:`q` + are the marginal probability distributions of `joint_pmat`, and :math:`d(i,j)` + is the distance between node :math:`i` and :math:`j`. + + Check :func:`network_variance` for usage. + + References + ---------- + .. [1] Devriendt, K., Martin-Gutierrez, S., & Lambiotte, R. (2022). + Variance and covariance of distributions on graphs. SIAM Review, 64(2), + 343–359. https://doi.org/10.1137/20M1361328 + + See Also + -------- + netneurotools.stats.network_variance + """ + p = np.sum(joint_pmat, axis=1) + q = np.sum(joint_pmat, axis=0) + D_sq = np.multiply(D, D) + cov = p.T @ D_sq @ q - np.sum(np.multiply(joint_pmat, D_sq)) + if calc_marginal: + var_p = p.T @ D_sq @ p + var_q = q.T @ D_sq @ q + else: + var_p, var_q = 0, 0 + return 0.5 * cov, 0.5 * var_p, 0.5 * var_q + + +def network_covariance_numba(joint_pmat, D, calc_marginal=True): + """ + Numba version of :meth:`netneurotools.stats.network_covariance`. + + .. warning:: + Test before use. + + Parameters + ---------- + joint_pmat : (N, N) array_like + Joint probability matrix. Please make sure that it is valid. + D : (N, N) array_like + Distance matrix. + calc_marginal : bool, optional + Whether to calculate marginal variance. It will be marginally faster if + :code:`calc_marginal=False` (returning marginal variances as 0). Default: True + + Returns + ------- + network_covariance : float + Covariance of `joint_pmat` on `D` + var_p : float + Marginal variance of `joint_pmat` on `D`. + Will be 0 if :code:`calc_marginal=False` + var_q : float + Marginal variance of `joint_pmat` on `D`. + Will be 0 if :code:`calc_marginal=False` + """ + n = joint_pmat.shape[0] + p = np.sum(joint_pmat, axis=1) + q = np.sum(joint_pmat, axis=0) + cov = 0.0 + var_p, var_q = 0.0, 0.0 + for i in range(n): + for j in range(n): + cov += (p[i] * q[j] - joint_pmat[i, j]) * D[i, j]**2 + if calc_marginal: + var_p += p[i] * p[j] * D[i, j]**2 + var_q += q[i] * q[j] * D[i, j]**2 + return 0.5 * cov, 0.5 * var_p, 0.5 * var_q + + +if use_numba: + network_covariance_numba = njit(network_covariance_numba) diff --git a/netneurotools/metrics/tests/__init__.py b/netneurotools/metrics/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/netneurotools/tests/test_metrics.py b/netneurotools/metrics/tests/test_bct.py similarity index 72% rename from netneurotools/tests/test_metrics.py rename to netneurotools/metrics/tests/test_bct.py index 253da0f..f83ab6a 100644 --- a/netneurotools/tests/test_metrics.py +++ b/netneurotools/metrics/tests/test_bct.py @@ -1,15 +1,15 @@ -# -*- coding: utf-8 -*- -"""For testing netneurotools.metrics functionality.""" +"""For testing netneurotools.metrics.bct functionality.""" -import numpy as np import pytest +import numpy as np from netneurotools import metrics rs = np.random.RandomState(1234) -def test_communicability(): +def test_communicability_bin(): + """Test communicability_bin function.""" comm = metrics.communicability_bin(rs.choice([0, 1], size=(100, 100))) assert comm.shape == (100, 100) @@ -18,6 +18,7 @@ def test_communicability(): def test_communicability_wei(): + """Test communicability_wei function.""" comm = metrics.communicability_wei(rs.rand(100, 100)) assert comm.shape == (100, 100) assert np.allclose(np.diag(comm), 0) diff --git a/netneurotools/metrics/tests/test_communication.py b/netneurotools/metrics/tests/test_communication.py new file mode 100644 index 0000000..dd066f8 --- /dev/null +++ b/netneurotools/metrics/tests/test_communication.py @@ -0,0 +1 @@ +"""For testing netneurotools.metrics.communication functionality.""" diff --git a/netneurotools/metrics/tests/test_control.py b/netneurotools/metrics/tests/test_control.py new file mode 100644 index 0000000..28ad7c2 --- /dev/null +++ b/netneurotools/metrics/tests/test_control.py @@ -0,0 +1 @@ +"""For testing netneurotools.metrics.control functionality.""" diff --git a/netneurotools/metrics/tests/test_spreading.py b/netneurotools/metrics/tests/test_spreading.py new file mode 100644 index 0000000..216c638 --- /dev/null +++ b/netneurotools/metrics/tests/test_spreading.py @@ -0,0 +1 @@ +"""For testing netneurotools.metrics.spreading functionality.""" diff --git a/netneurotools/metrics/tests/test_statistical.py b/netneurotools/metrics/tests/test_statistical.py new file mode 100644 index 0000000..73caf2d --- /dev/null +++ b/netneurotools/metrics/tests/test_statistical.py @@ -0,0 +1 @@ +"""For testing netneurotools.metrics.statistical functionality.""" diff --git a/netneurotools/modularity.py b/netneurotools/modularity.py deleted file mode 100644 index 1831dd9..0000000 --- a/netneurotools/modularity.py +++ /dev/null @@ -1,316 +0,0 @@ -# -*- coding: utf-8 -*- -"""Functions for working with network modularity.""" - -import bct -import numpy as np -from sklearn.utils.validation import check_random_state -from . import cluster - -try: - from numba import njit, prange - use_numba = True -except ImportError: - prange = range - use_numba = False - - -def consensus_modularity(adjacency, gamma=1, B='modularity', - repeats=250, null_func=np.mean, seed=None): - """ - Find community assignments from `adjacency` through consensus. - - Performs `repeats` iterations of community detection on `adjacency` and - then uses consensus clustering on the resulting community assignments. - - Parameters - ---------- - adjacency : (N, N) array_like - Adjacency matrix (weighted/non-weighted) on which to perform consensus - community detection. - gamma : float, optional - Resolution parameter for modularity maximization. Default: 1 - B : str or (N, N) array_like, optional - Null model to use for consensus clustering. If `str`, must be one of - ['modularity', 'potts', 'negative_sym', 'negative_asym']. Default: - 'modularity' - repeats : int, optional - Number of times to repeat Louvain algorithm clustering. Default: 250 - null_func : callable, optional - Function used to generate null model when performing consensus-based - clustering. Must accept a 2D array as input and return a single value. - Default: `np.mean` - seed : {int, np.random.RandomState instance, None}, optional - Seed for random number generation. Default: None - - Returns - ------- - consensus : (N,) np.ndarray - Consensus-derived community assignments - Q_all : array_like - Optimized modularity over all `repeats` community assignments - zrand_all : array_like - z-Rand score over all pairs of `repeats` community assignment vectors - - References - ---------- - Bassett, D. S., Porter, M. A., Wymbs, N. F., Grafton, S. T., Carlson, - J. M., & Mucha, P. J. (2013). Robust detection of dynamic community - structure in networks. Chaos: An Interdisciplinary Journal of Nonlinear - Science, 23(1), 013142. - """ - # generate community partitions `repeat` times - comms, Q_all = zip(*[bct.community_louvain(adjacency, gamma=gamma, B=B) - for i in range(repeats)]) - comms = np.column_stack(comms) - - # find consensus cluster assignments across all partitoning solutions - consensus = cluster.find_consensus(comms, null_func=null_func, seed=seed) - - # get z-rand statistics for partition similarity (n.b. can take a while) - zrand_all = _zrand_partitions(comms) - - return consensus, np.array(Q_all), zrand_all - - -def _dummyvar(labels): - """ - Generate dummy-coded array from provided community assignment `labels`. - - Parameters - ---------- - labels : (N,) array_like - Labels assigning `N` samples to `G` groups - - Returns - ------- - ci : (N, G) numpy.ndarray - Dummy-coded array where 1 indicates that a sample belongs to a group - """ - comms = np.unique(labels) - - ci = np.zeros((len(labels), len(comms))) - for n, grp in enumerate(comms): - ci[:, n] = labels == grp - - return ci - - -def zrand(X, Y): - """ - Calculate the z-Rand index of two community assignments. - - Parameters - ---------- - X, Y : (n, 1) array_like - Community assignment vectors to compare - - Returns - ------- - z_rand : float - Z-rand index - - References - ---------- - Amanda L. Traud, Eric D. Kelsic, Peter J. Mucha, and Mason A. Porter. - (2011). Comparing Community Structure to Characteristics in Online - Collegiate Social Networks. SIAM Review, 53, 526-543. - """ - if X.ndim > 1 or Y.ndim > 1: - if X.shape[-1] > 1 or Y.shape[-1] > 1: - raise ValueError('X and Y must have only one-dimension each. ' - 'Please check inputs.') - - Xf = X.flatten() - Yf = Y.flatten() - - n = len(Xf) - indx, indy = _dummyvar(Xf), _dummyvar(Yf) - Xa = indx.dot(indx.T) - Ya = indy.dot(indy.T) - - M = n * (n - 1) / 2 - M1 = Xa.nonzero()[0].size / 2 - M2 = Ya.nonzero()[0].size / 2 - - wab = np.logical_and(Xa, Ya).nonzero()[0].size / 2 - - mod = n * (n**2 - 3 * n - 2) - C1 = mod - (8 * (n + 1) * M1) + (4 * np.power(indx.sum(0), 3).sum()) - C2 = mod - (8 * (n + 1) * M2) + (4 * np.power(indy.sum(0), 3).sum()) - - a = M / 16 - b = ((4 * M1 - 2 * M)**2) * ((4 * M2 - 2 * M)**2) / (256 * (M**2)) - c = C1 * C2 / (16 * n * (n - 1) * (n - 2)) - d = ((((4 * M1 - 2 * M)**2) - (4 * C1) - (4 * M)) - * (((4 * M2 - 2 * M)**2) - (4 * C2) - (4 * M)) - / (64 * n * (n - 1) * (n - 2) * (n - 3))) - - sigw2 = a - b + c + d - # catch any negatives - if sigw2 < 0: - return 0 - z_rand = (wab - ((M1 * M2) / M)) / np.sqrt(sigw2) - - return z_rand - - -def _zrand_partitions(communities): - """ - Calculate z-Rand for all pairs of assignments in `communities`. - - Iterates through every pair of community assignment vectors in - `communities` and calculates the z-Rand score to assess their similarity. - - Parameters - ---------- - communities : (S, R) array_like - Community assignments for `S` samples over `R` partitions - - Returns - ------- - all_zrand : array_like - z-Rand score over all pairs of `R` partitions of community assignments - """ - n_partitions = communities.shape[-1] - all_zrand = np.zeros(int(n_partitions * (n_partitions - 1) / 2)) - - for c1 in prange(n_partitions): - for c2 in prange(c1 + 1, n_partitions): - idx = int((c1 * n_partitions) + c2 - ((c1 + 1) * (c1 + 2) // 2)) - all_zrand[idx] = zrand(communities[:, c1], communities[:, c2]) - - return all_zrand - - -if use_numba: - _dummyvar = njit(_dummyvar) - zrand = njit(zrand) - _zrand_partitions = njit(_zrand_partitions, parallel=True) - - -def get_modularity(adjacency, comm, gamma=1): - """ - Calculate modularity contribution for each community in `comm`. - - Parameters - ---------- - adjacency : (N, N) array_like - Adjacency (e.g., correlation) matrix - comm : (N,) array_like - Community assignment vector splitting `N` subjects into `G` groups - gamma : float, optional - Resolution parameter used in original modularity maximization. - Default: 1 - - Returns - ------- - comm_q : (G,) ndarray - Relative modularity for each community - - See Also - -------- - netneurotools.modularity.get_modularity_z - netneurotools.modularity.get_modularity_sig - """ - adjacency, comm = np.asarray(adjacency), np.asarray(comm) - s = adjacency.sum() - B = adjacency - (gamma * np.outer(adjacency.sum(axis=1), - adjacency.sum(axis=0)) / s) - - # find modularity contribution of each community - communities = np.unique(comm) - comm_q = np.empty(shape=communities.size) - for n, ci in enumerate(communities): - inds = comm == ci - comm_q[n] = B[np.ix_(inds, inds)].sum() / s - - return comm_q - - -def get_modularity_z(adjacency, comm, gamma=1, n_perm=10000, seed=None): - """ - Calculate average z-score of community assignments by permutation. - - Parameters - ---------- - adjacency : (N, N) array_like - Adjacency (correlation) matrix - comm : (N,) array_like - Community assignment vector splitting `N` subjects into `G` groups - gamma : float, optional - Resolution parameter used in original modularity maximization. - Default: 1 - n_perm : int, optional - Number of permutations. Default: 10000 - seed : {int, np.random.RandomState instance, None}, optional - Seed for random number generation. Default: None - - Returns - ------- - q_z : float - Average Z-score of modularity of communities - - See Also - -------- - netneurotools.modularity.get_modularity - netneurotools.modularity.get_modularity_sig - """ - rs = check_random_state(seed) - - real_qs = get_modularity(adjacency, comm, gamma) - simu_qs = np.empty(shape=(np.unique(comm).size, n_perm)) - for perm in range(n_perm): - simu_qs[:, perm] = get_modularity(adjacency, - rs.permutation(comm), - gamma) - - # avoid instances where dist.std(1) == 0 - std = simu_qs.std(axis=1) - if std == 0: - return np.mean(real_qs - simu_qs.mean(axis=1)) - else: - return np.mean((real_qs - simu_qs.mean(axis=1)) / std) - - -def get_modularity_sig(adjacency, comm, gamma=1, n_perm=10000, alpha=0.01, - seed=None): - """ - Calculate significance of community assignments in `comm` by permutation. - - Parameters - ---------- - adjacency : (N, N) array_like - Adjacency (correlation) matrix - comm : (N,) array_like - Community assignment vector - gamma : float - Resolution parameter used in original modularity maximization - n_perm : int, optional - Number of permutations to test against. Default: 10000 - alpha : (0,1) float, optional - Alpha level to assess significance. Default: 0.01 - seed : {int, np.random.RandomState instance, None}, optional - Seed for random number generation. Default: None - - Returns - ------- - ndarray - Significance of each community in `comm` (boolean) - - See Also - -------- - netneurotools.modularity.get_modularity_z - netneurotools.modularity.get_modularity_sig - """ - rs = check_random_state(seed) - - real_qs = get_modularity(adjacency, comm, gamma) - simu_qs = np.empty(shape=(np.unique(comm).size, n_perm)) - for perm in range(n_perm): - simu_qs[:, perm] = get_modularity(adjacency, - rs.permutation(comm), - gamma) - - q_sig = real_qs > np.percentile(simu_qs, 100 * (1 - alpha), axis=1) - - return q_sig diff --git a/netneurotools/modularity/__init__.py b/netneurotools/modularity/__init__.py new file mode 100644 index 0000000..4f320cb --- /dev/null +++ b/netneurotools/modularity/__init__.py @@ -0,0 +1,25 @@ +"""Functions for working with network modularity.""" + + +from .modules import ( + match_cluster_labels, + match_assignments, + reorder_assignments, + find_consensus, + consensus_modularity, + _dummyvar, + zrand, + _zrand_partitions, + get_modularity, + get_modularity_z, + get_modularity_sig, +) + + +__all__ = [ + # modules + 'match_cluster_labels', 'match_assignments', 'reorder_assignments', + 'find_consensus', 'consensus_modularity', '_dummyvar', 'zrand', + '_zrand_partitions', 'get_modularity', 'get_modularity_z', + 'get_modularity_sig', +] \ No newline at end of file diff --git a/netneurotools/cluster.py b/netneurotools/modularity/modules.py similarity index 58% rename from netneurotools/cluster.py rename to netneurotools/modularity/modules.py index 4b46a9d..4eef241 100644 --- a/netneurotools/cluster.py +++ b/netneurotools/modularity/modules.py @@ -1,11 +1,18 @@ -# -*- coding: utf-8 -*- -"""Functions for clustering and working with cluster solutions.""" +"""Functions for working with network modules.""" import bct import numpy as np +from sklearn.utils.validation import check_random_state from scipy import optimize from scipy.cluster import hierarchy -from sklearn.utils.validation import check_random_state + +try: + from numba import njit, prange + use_numba = True +except ImportError: + prange = range + use_numba = False + def _get_relabels(c1, c2): @@ -362,3 +369,305 @@ def find_consensus(assignments, null_func=np.mean, return_agreement=False, return consensus.astype(int), agreement * (agreement > threshold) return consensus.astype(int) + + +def consensus_modularity(adjacency, gamma=1, B='modularity', + repeats=250, null_func=np.mean, seed=None): + """ + Find community assignments from `adjacency` through consensus. + + Performs `repeats` iterations of community detection on `adjacency` and + then uses consensus clustering on the resulting community assignments. + + Parameters + ---------- + adjacency : (N, N) array_like + Adjacency matrix (weighted/non-weighted) on which to perform consensus + community detection. + gamma : float, optional + Resolution parameter for modularity maximization. Default: 1 + B : str or (N, N) array_like, optional + Null model to use for consensus clustering. If `str`, must be one of + ['modularity', 'potts', 'negative_sym', 'negative_asym']. Default: + 'modularity' + repeats : int, optional + Number of times to repeat Louvain algorithm clustering. Default: 250 + null_func : callable, optional + Function used to generate null model when performing consensus-based + clustering. Must accept a 2D array as input and return a single value. + Default: `np.mean` + seed : {int, np.random.RandomState instance, None}, optional + Seed for random number generation. Default: None + + Returns + ------- + consensus : (N,) np.ndarray + Consensus-derived community assignments + Q_all : array_like + Optimized modularity over all `repeats` community assignments + zrand_all : array_like + z-Rand score over all pairs of `repeats` community assignment vectors + + References + ---------- + Bassett, D. S., Porter, M. A., Wymbs, N. F., Grafton, S. T., Carlson, + J. M., & Mucha, P. J. (2013). Robust detection of dynamic community + structure in networks. Chaos: An Interdisciplinary Journal of Nonlinear + Science, 23(1), 013142. + """ + # generate community partitions `repeat` times + comms, Q_all = zip(*[bct.community_louvain(adjacency, gamma=gamma, B=B) + for i in range(repeats)]) + comms = np.column_stack(comms) + + # find consensus cluster assignments across all partitoning solutions + consensus = find_consensus(comms, null_func=null_func, seed=seed) + + # get z-rand statistics for partition similarity (n.b. can take a while) + zrand_all = _zrand_partitions(comms) + + return consensus, np.array(Q_all), zrand_all + + +def _dummyvar(labels): + """ + Generate dummy-coded array from provided community assignment `labels`. + + Parameters + ---------- + labels : (N,) array_like + Labels assigning `N` samples to `G` groups + + Returns + ------- + ci : (N, G) numpy.ndarray + Dummy-coded array where 1 indicates that a sample belongs to a group + """ + comms = np.unique(labels) + + ci = np.zeros((len(labels), len(comms))) + for n, grp in enumerate(comms): + ci[:, n] = labels == grp + + return ci + + +def zrand(X, Y): + """ + Calculate the z-Rand index of two community assignments. + + Parameters + ---------- + X, Y : (n, 1) array_like + Community assignment vectors to compare + + Returns + ------- + z_rand : float + Z-rand index + + References + ---------- + Amanda L. Traud, Eric D. Kelsic, Peter J. Mucha, and Mason A. Porter. + (2011). Comparing Community Structure to Characteristics in Online + Collegiate Social Networks. SIAM Review, 53, 526-543. + """ + if X.ndim > 1 or Y.ndim > 1: + if X.shape[-1] > 1 or Y.shape[-1] > 1: + raise ValueError('X and Y must have only one-dimension each. ' + 'Please check inputs.') + + Xf = X.flatten() + Yf = Y.flatten() + + n = len(Xf) + indx, indy = _dummyvar(Xf), _dummyvar(Yf) + Xa = indx.dot(indx.T) + Ya = indy.dot(indy.T) + + M = n * (n - 1) / 2 + M1 = Xa.nonzero()[0].size / 2 + M2 = Ya.nonzero()[0].size / 2 + + wab = np.logical_and(Xa, Ya).nonzero()[0].size / 2 + + mod = n * (n**2 - 3 * n - 2) + C1 = mod - (8 * (n + 1) * M1) + (4 * np.power(indx.sum(0), 3).sum()) + C2 = mod - (8 * (n + 1) * M2) + (4 * np.power(indy.sum(0), 3).sum()) + + a = M / 16 + b = ((4 * M1 - 2 * M)**2) * ((4 * M2 - 2 * M)**2) / (256 * (M**2)) + c = C1 * C2 / (16 * n * (n - 1) * (n - 2)) + d = ((((4 * M1 - 2 * M)**2) - (4 * C1) - (4 * M)) + * (((4 * M2 - 2 * M)**2) - (4 * C2) - (4 * M)) + / (64 * n * (n - 1) * (n - 2) * (n - 3))) + + sigw2 = a - b + c + d + # catch any negatives + if sigw2 < 0: + return 0 + z_rand = (wab - ((M1 * M2) / M)) / np.sqrt(sigw2) + + return z_rand + + +def _zrand_partitions(communities): + """ + Calculate z-Rand for all pairs of assignments in `communities`. + + Iterates through every pair of community assignment vectors in + `communities` and calculates the z-Rand score to assess their similarity. + + Parameters + ---------- + communities : (S, R) array_like + Community assignments for `S` samples over `R` partitions + + Returns + ------- + all_zrand : array_like + z-Rand score over all pairs of `R` partitions of community assignments + """ + n_partitions = communities.shape[-1] + all_zrand = np.zeros(int(n_partitions * (n_partitions - 1) / 2)) + + for c1 in prange(n_partitions): + for c2 in prange(c1 + 1, n_partitions): + idx = int((c1 * n_partitions) + c2 - ((c1 + 1) * (c1 + 2) // 2)) + all_zrand[idx] = zrand(communities[:, c1], communities[:, c2]) + + return all_zrand + + +if use_numba: + _dummyvar = njit(_dummyvar) + zrand = njit(zrand) + _zrand_partitions = njit(_zrand_partitions, parallel=True) + + +def get_modularity(adjacency, comm, gamma=1): + """ + Calculate modularity contribution for each community in `comm`. + + Parameters + ---------- + adjacency : (N, N) array_like + Adjacency (e.g., correlation) matrix + comm : (N,) array_like + Community assignment vector splitting `N` subjects into `G` groups + gamma : float, optional + Resolution parameter used in original modularity maximization. + Default: 1 + + Returns + ------- + comm_q : (G,) ndarray + Relative modularity for each community + + See Also + -------- + netneurotools.modularity.get_modularity_z + netneurotools.modularity.get_modularity_sig + """ + adjacency, comm = np.asarray(adjacency), np.asarray(comm) + s = adjacency.sum() + B = adjacency - (gamma * np.outer(adjacency.sum(axis=1), + adjacency.sum(axis=0)) / s) + + # find modularity contribution of each community + communities = np.unique(comm) + comm_q = np.empty(shape=communities.size) + for n, ci in enumerate(communities): + inds = comm == ci + comm_q[n] = B[np.ix_(inds, inds)].sum() / s + + return comm_q + + +def get_modularity_z(adjacency, comm, gamma=1, n_perm=10000, seed=None): + """ + Calculate average z-score of community assignments by permutation. + + Parameters + ---------- + adjacency : (N, N) array_like + Adjacency (correlation) matrix + comm : (N,) array_like + Community assignment vector splitting `N` subjects into `G` groups + gamma : float, optional + Resolution parameter used in original modularity maximization. + Default: 1 + n_perm : int, optional + Number of permutations. Default: 10000 + seed : {int, np.random.RandomState instance, None}, optional + Seed for random number generation. Default: None + + Returns + ------- + q_z : float + Average Z-score of modularity of communities + + See Also + -------- + netneurotools.modularity.get_modularity + netneurotools.modularity.get_modularity_sig + """ + rs = check_random_state(seed) + + real_qs = get_modularity(adjacency, comm, gamma) + simu_qs = np.empty(shape=(np.unique(comm).size, n_perm)) + for perm in range(n_perm): + simu_qs[:, perm] = get_modularity(adjacency, + rs.permutation(comm), + gamma) + + # avoid instances where dist.std(1) == 0 + std = simu_qs.std(axis=1) + if std == 0: + return np.mean(real_qs - simu_qs.mean(axis=1)) + else: + return np.mean((real_qs - simu_qs.mean(axis=1)) / std) + + +def get_modularity_sig(adjacency, comm, gamma=1, n_perm=10000, alpha=0.01, + seed=None): + """ + Calculate significance of community assignments in `comm` by permutation. + + Parameters + ---------- + adjacency : (N, N) array_like + Adjacency (correlation) matrix + comm : (N,) array_like + Community assignment vector + gamma : float + Resolution parameter used in original modularity maximization + n_perm : int, optional + Number of permutations to test against. Default: 10000 + alpha : (0,1) float, optional + Alpha level to assess significance. Default: 0.01 + seed : {int, np.random.RandomState instance, None}, optional + Seed for random number generation. Default: None + + Returns + ------- + ndarray + Significance of each community in `comm` (boolean) + + See Also + -------- + netneurotools.modularity.get_modularity_z + netneurotools.modularity.get_modularity_sig + """ + rs = check_random_state(seed) + + real_qs = get_modularity(adjacency, comm, gamma) + simu_qs = np.empty(shape=(np.unique(comm).size, n_perm)) + for perm in range(n_perm): + simu_qs[:, perm] = get_modularity(adjacency, + rs.permutation(comm), + gamma) + + q_sig = real_qs > np.percentile(simu_qs, 100 * (1 - alpha), axis=1) + + return q_sig \ No newline at end of file diff --git a/netneurotools/modularity/tests/__init__.py b/netneurotools/modularity/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/netneurotools/tests/test_cluster.py b/netneurotools/modularity/tests/test_modules.py similarity index 58% rename from netneurotools/tests/test_cluster.py rename to netneurotools/modularity/tests/test_modules.py index 59b9f8a..12b82cd 100644 --- a/netneurotools/tests/test_cluster.py +++ b/netneurotools/modularity/tests/test_modules.py @@ -1,13 +1,13 @@ -# -*- coding: utf-8 -*- -"""For testing netneurotools.cluster functionality.""" +"""For testing netneurotools.modularity.modules functionality.""" import bct -import numpy as np import pytest +import numpy as np from sklearn.cluster import k_means, spectral_clustering -from netneurotools import cluster +from netneurotools import modularity +rs = np.random.RandomState(1234) @pytest.mark.parametrize('c1, c2, out', [ # uniform communities @@ -28,10 +28,12 @@ np.array([1, 1, 1, 3, 3, 3, 2, 2, 2])) ]) def test_match_cluster_labels(c1, c2, out): - assert np.all(cluster.match_cluster_labels(c1, c2) == out) + """Test matching of cluster labels.""" + assert np.all(modularity.match_cluster_labels(c1, c2) == out) def test_match_assignments(): + """Test matching of clustering assignments.""" # generate some random data to be clustered (must be symmetric) rs = np.random.RandomState(1234) data = rs.rand(100, 100) @@ -48,7 +50,7 @@ def test_match_assignments(): # match labels and assert that we got perfect matches (this is not 100% # guaranteed with spectral clustering but it is...pretty likely) - matched = cluster.match_assignments(assignments, seed=rs) + matched = modularity.match_assignments(assignments, seed=rs) assert np.all(matched[:, [0]] == matched) # check that we didn't _actually_ change cluster assignments with matching; @@ -58,6 +60,7 @@ def test_match_assignments(): def test_reorder_assignments(): + """Test re-ordering of clustering assignments.""" # generate a bunch of ~random(ish) clustering assignments that have a bit # of consistency but aren't all identical rs = np.random.RandomState(1234) @@ -72,11 +75,11 @@ def test_reorder_assignments(): # (we're re-labelling the matrix but k-means does not provide stable # clustering assignments so we shouldn't get identical assignments even # after "matching") - reordered, idx = cluster.reorder_assignments(assignments, seed=1234) + reordered, idx = modularity.reorder_assignments(assignments, seed=1234) assert not np.all(reordered[:, [0]] == reordered) # make sure that the returned idx does exactly what it's supposed to - matched = cluster.match_assignments(assignments, seed=1234)[idx] + matched = modularity.match_assignments(assignments, seed=1234)[idx] assert np.all(matched == reordered) @@ -87,4 +90,44 @@ def test_reorder_assignments(): np.array([1, 1, 1, 2, 2, 2, 3, 3, 3])) ]) def test_find_consensus(assignments, clusters): - assert np.all(cluster.find_consensus(assignments) == clusters) + """Test finding consensus clustering.""" + assert np.all(modularity.find_consensus(assignments) == clusters) + + +def test_dummyvar(): + """Test generation of dummy variables.""" + # generate small example dummy variable code + out = modularity._dummyvar(np.array([1, 1, 2, 3, 3])) + assert np.all(out == np.array([[1, 0, 0], + [1, 0, 0], + [0, 1, 0], + [0, 0, 1], + [0, 0, 1]])) + + allones = np.array([1, 1, 1, 1, 1, 1, 1, 1]) + assert np.all(modularity._dummyvar(allones) == allones) + + +def test_zrand(): + """Test calculation of zrand.""" + # make the same two-group community assignments (with different labels) + label = np.ones((100, 1)) + X, Y = np.vstack((label, label * 2)), np.vstack((label * 2, label)) + # compare + assert modularity.zrand(X, Y) == modularity.zrand(X, Y[::-1]) + random = rs.choice([0, 1], size=X.shape) + assert modularity.zrand(X, Y) > modularity.zrand(X, random) + assert modularity.zrand(X, Y) == modularity.zrand(X[:, 0], Y[:, 0]) + + +def test_zrand_partitions(): + """Test calculation of zrand for partitions.""" + # make random communities + comm = rs.choice(range(6), size=(10, 100)) + all_diff = modularity._zrand_partitions(comm) + all_same = modularity._zrand_partitions(np.repeat(comm[:, [0]], 10, axis=1)) + + # partition of labels that are all the same should have higher average + # zrand and lower stdev zrand + assert np.nanmean(all_same) > np.nanmean(all_diff) + assert np.nanstd(all_same) < np.nanstd(all_diff) \ No newline at end of file diff --git a/netneurotools/networks/__init__.py b/netneurotools/networks/__init__.py new file mode 100644 index 0000000..fb7d82a --- /dev/null +++ b/netneurotools/networks/__init__.py @@ -0,0 +1,33 @@ +"""Functions for constucting networks.""" + + +from .consensus import ( + func_consensus, struct_consensus +) + + +from .randomize import ( + randmio_und, + match_length_degree_distribution, + strength_preserving_rand_sa, + strength_preserving_rand_sa_mse_opt, + strength_preserving_rand_sa_dir +) + + +from .networks_utils import ( + binarize_network, threshold_network, get_triu +) + + +__all__ = [ + # consensus + 'func_consensus', 'struct_consensus', + # generative + # randomize + 'randmio_und', 'match_length_degree_distribution', + 'strength_preserving_rand_sa', 'strength_preserving_rand_sa_mse_opt', + 'strength_preserving_rand_sa_dir', + # networks_utils + 'binarize_network', 'threshold_network', 'get_triu' +] diff --git a/netneurotools/networks/consensus.py b/netneurotools/networks/consensus.py new file mode 100644 index 0000000..b091110 --- /dev/null +++ b/netneurotools/networks/consensus.py @@ -0,0 +1,295 @@ +"""Functions for generating consensus networks.""" + +import numpy as np +from sklearn.utils.validation import ( + check_random_state, check_array, check_consistent_length +) + +def func_consensus(data, n_boot=1000, ci=95, seed=None): + """ + Calculate thresholded group consensus functional connectivity graph. + + This function concatenates all time series in `data` and computes a group + correlation matrix based on this extended time series. It then generates + length `T` bootstrapped samples from the concatenated matrix and estimates + confidence intervals for all correlations. Correlations whose sign is + consistent across bootstraps are retained; inconsistent correlations are + set to zero. + + If `n_boot` is set to 0 or None a simple, group-averaged functional + connectivity matrix is estimated, instead. + + Parameters + ---------- + data : (N, T, S) array_like (or a list of S arrays, each shaped as (N, T)) + Pre-processed functional time series, where `N` is the number of nodes, + `T` is the number of volumes in the time series, and `S` is the number + of subjects. + n_boot : int, optional + Number of bootstraps for which to generate correlation. Default: 1000 + ci : (0, 100) float, optional + Confidence interval for which to assess the reliability of correlations + with bootstraps. Default: 95 + seed : int, optional + Random seed. Default: None + + Returns + ------- + consensus : (N, N) numpy.ndarray + Thresholded, group-level correlation matrix + + References + ---------- + Mišić, B., Betzel, R. F., Nematzadeh, A., Goni, J., Griffa, A., Hagmann, + P., Flammini, A., Ahn, Y.-Y., & Sporns, O. (2015). Cooperative and + competitive spreading dynamics on the human connectome. Neuron, 86(6), + 1518-1529. + """ + # check inputs + rs = check_random_state(seed) + if ci > 100 or ci < 0: + raise ValueError("`ci` must be between 0 and 100.") + + # group-average functional connectivity matrix desired instead of bootstrap + if n_boot == 0 or n_boot is None: + if isinstance(data, list): + corrs = [np.corrcoef(sub) for sub in data] + else: + corrs = [np.corrcoef(data[..., sub]) for sub in + range(data.shape[-1])] + return np.nanmean(corrs, axis=0) + + if isinstance(data, list): + collapsed_data = np.hstack(data) + nsample = int(collapsed_data.shape[-1] / len(data)) + else: + collapsed_data = data.reshape((len(data), -1), order='F') + nsample = data.shape[1] + + consensus = np.corrcoef(collapsed_data) + + # only keep the upper triangle for the bootstraps to save on memory usage + triu_inds = np.triu_indices_from(consensus, k=1) + bootstrapped_corrmat = np.zeros((len(triu_inds[0]), n_boot)) + + # generate `n_boot` bootstrap correlation matrices by sampling `t` time + # points from the concatenated time series + for boot in range(n_boot): + inds = rs.randint(collapsed_data.shape[-1], size=nsample) + bootstrapped_corrmat[..., boot] = \ + np.corrcoef(collapsed_data[:, inds])[triu_inds] + + # extract the CIs from the bootstrapped correlation matrices + # we don't need the input anymore so overwrite it + bootstrapped_ci = np.percentile(bootstrapped_corrmat, [100 - ci, ci], + axis=-1, overwrite_input=True) + + # remove unreliable (i.e., CI zero-crossing) correlations + # if the signs of the bootstrapped confidence intervals are different + # (i.e., their signs sum to 0), then we want to remove them + # so, take the logical not of the CI (CI = 0 ---> True) and create a mask + # then, set all connections from the consensus array inside the mask to 0 + remove_inds = np.logical_not(np.sign(bootstrapped_ci).sum(axis=0)) + mask = np.zeros_like(consensus, dtype=bool) + mask[triu_inds] = remove_inds + consensus[mask + mask.T] = 0 + + return consensus + + +def _ecdf(data): + """ + Estimate empirical cumulative distribution function of `data`. + + Taken directly from StackOverflow. See original answer at + https://stackoverflow.com/questions/33345780. + + Parameters + ---------- + data : array_like + + Returns + ------- + prob : numpy.ndarray + Cumulative probability + quantiles : numpy.darray + Quantiles + """ + sample = np.atleast_1d(data) + + # find the unique values and their corresponding counts + quantiles, counts = np.unique(sample, return_counts=True) + + # take the cumulative sum of the counts and divide by the sample size to + # get the cumulative probabilities between 0 and 1 + prob = np.cumsum(counts).astype(float) / sample.size + + # match MATLAB + prob, quantiles = np.append([0], prob), np.append(quantiles[0], quantiles) + + return prob, quantiles + + +def struct_consensus(data, distance, hemiid, + conn_num_inter=None, + conn_num_intra=None, + weighted=False): + """ + Calculate distance-dependent group consensus structural connectivity graph. + + Takes as input a weighted stack of connectivity matrices with dimensions + (N, N, S) where `N` is the number of nodes and `S` is the number of + matrices or subjects. The matrices must be weighted, and ideally with + continuous weights (e.g. fractional anisotropy rather than streamline + count). The second input is a pairwise distance matrix, where distance(i,j) + is the Euclidean distance between nodes i and j. The final input is an + (N, 1) vector which labels nodes as belonging to the right (`hemiid==0`) or + left (`hemiid=1`) hemisphere (note that these values can be flipped as long + as `hemiid` contains only values of 0 and 1). + + This function estimates the average edge length distribution and builds + a group-averaged connectivity matrix that approximates this distribution + with density equal to the mean density across subjects. + + The algorithm works as follows: + + 1. Estimate the cumulative edge length distribution, + 2. Divide the distribution into M length bins, one for each edge that will + be added to the group-average matrix, and + 3. Within each bin, select the edge that is most consistently expressed + expressed across subjects, breaking ties according to average edge + weight (which is why the input matrix `data` must be weighted). + + The algorithm works separately on within/between hemisphere links. + M is the sum of `conn_num_inter` and `conn_num_intra`, if provided. + Otherwise, M is estimated from the data. + + Parameters + ---------- + data : (N, N, S) array_like + Weighted connectivity matrices (i.e., fractional anisotropy), where `N` + is nodes and `S` is subjects + distance : (N, N) array_like + Array where `distance[i, j]` is the Euclidean distance between nodes + `i` and `j` + hemiid : (N, 1) array_like + Hemisphere designation for `N` nodes where a value of 0/1 indicates + node `N_{i}` is in the right/left hemisphere, respectively + conn_num_inter : int, optional + Number of inter-hemispheric connections to include in the consensus + matrix. If `None`, the number of inter-hemispheric connections will be + estimated from the data. Default = `None`. + conn_num_intra : int, optional + Number of intra-hemispheric connections to include in the consensus + matrix. If `None`, the number of intra-hemispheric connections will be + estimated from the data. Default = `None`. + weighted : bool + Flag indicating whether or not to return a weighted consensus map. If + `True`, the consensus will be multiplied by the mean of `data`. + + Returns + ------- + consensus : (N, N) numpy.ndarray + Binary (default) or mean-weighted group-level connectivity matrix + + References + ---------- + Betzel, R. F., Griffa, A., Hagmann, P., & Mišić, B. (2018). Distance- + dependent consensus thresholds for generating group-representative + structural brain networks. Network Neuroscience, 1-22. + """ + # confirm input shapes are as expected + check_consistent_length(data, distance, hemiid) + try: + hemiid = check_array(hemiid, ensure_2d=True) + except ValueError: + raise ValueError('Provided hemiid must be a 2D array. Reshape your ' + 'data using array.reshape(-1, 1) and try again.') from None + + num_node, _, num_sub = data.shape # info on connectivity matrices + pos_data = data > 0 # location of + values in matrix + pos_data_count = pos_data.sum(axis=2) # num sub with + values at each node + + with np.errstate(divide='ignore', invalid='ignore'): + average_weights = data.sum(axis=2) / pos_data_count + + # empty array to hold inter/intra hemispheric connections + consensus = np.zeros((num_node, num_node, 2)) + + for conn_type in range(2): # iterate through inter/intra hemisphere conn + if conn_type == 0: # get inter hemisphere edges + inter_hemi = (hemiid == 0) @ (hemiid == 1).T + keep_conn = np.logical_or(inter_hemi, inter_hemi.T) + else: # get intra hemisphere edges + right_hemi = (hemiid == 0) @ (hemiid == 0).T + left_hemi = (hemiid == 1) @ (hemiid == 1).T + keep_conn = np.logical_or(right_hemi @ right_hemi.T, + left_hemi @ left_hemi.T) + + # mask the distance array for only those edges we want to examine + full_dist_conn = distance * keep_conn + upper_dist_conn = np.atleast_3d(np.triu(full_dist_conn)) + + # generate array of weighted (by distance), positive edges across subs + pos_dist = pos_data * upper_dist_conn + pos_dist = pos_dist[np.nonzero(pos_dist)] + + # determine average # of positive edges across subs + # we will use this to bin the edge weights + if conn_type == 0: + if conn_num_inter is None: + avg_conn_num = len(pos_dist) / num_sub + else: + avg_conn_num = conn_num_inter + else: + if conn_num_intra is None: + avg_conn_num = len(pos_dist) / num_sub + else: + avg_conn_num = conn_num_intra + + # estimate empirical CDF of weighted, positive edges across subs + cumprob, quantiles = _ecdf(pos_dist) + cumprob = np.round(cumprob * avg_conn_num).astype(int) + + # empty array to hold group-average matrix for current connection type + # (i.e., inter/intra hemispheric connections) + group_conn_type = np.zeros((num_node, num_node)) + + # iterate through bins (for edge weights) + for n in range(1, int(avg_conn_num) + 1): + # get current quantile of interest + curr_quant = quantiles[np.logical_and(cumprob >= (n - 1), + cumprob < n)] + if curr_quant.size == 0: + continue + + # find edges in distance connectivity matrix w/i current quantile + mask = np.logical_and(full_dist_conn >= curr_quant.min(), + full_dist_conn <= curr_quant.max()) + i, j = np.where(np.triu(mask)) # indices of edges of interest + + c = pos_data_count[i, j] # get num sub with + values at edges + w = average_weights[i, j] # get averaged weight of edges + + # find locations of edges most commonly represented across subs + indmax = np.argwhere(c == c.max()) + + # determine index of most frequent edge; break ties with higher + # weighted edge + if indmax.size == 1: # only one edge found + group_conn_type[i[indmax], j[indmax]] = 1 + else: # multiple edges found + indmax = indmax[np.argmax(w[indmax])] + group_conn_type[i[indmax], j[indmax]] = 1 + + consensus[:, :, conn_type] = group_conn_type + + # collapse across hemispheric connections types and make symmetrical array + consensus = consensus.sum(axis=2) + consensus = np.logical_or(consensus, consensus.T).astype(int) + + if weighted: + consensus = consensus * np.mean(data, axis=2) + return consensus + + diff --git a/netneurotools/networks/generative.py b/netneurotools/networks/generative.py new file mode 100644 index 0000000..7a7bff4 --- /dev/null +++ b/netneurotools/networks/generative.py @@ -0,0 +1 @@ +"""Functions for generative network models.""" diff --git a/netneurotools/networks/networks_utils.py b/netneurotools/networks/networks_utils.py new file mode 100644 index 0000000..bbf401f --- /dev/null +++ b/netneurotools/networks/networks_utils.py @@ -0,0 +1,127 @@ +"""Functions for supporting network constuction.""" + +import numpy as np +from scipy.sparse import csgraph + +def get_triu(data, k=1): + """ + Return vectorized version of upper triangle from `data`. + + Parameters + ---------- + data : (N, N) array_like + Input data + k : int, optional + Which diagonal to select from (where primary diagonal is 0). Default: 1 + + Returns + ------- + triu : (N * N-1 / 2) numpy.ndarray + Upper triangle of `data` + + Examples + -------- + >>> from netneurotools import utils + + >>> X = np.array([[1, 0.5, 0.25], [0.5, 1, 0.33], [0.25, 0.33, 1]]) + >>> tri = utils.get_triu(X) + >>> tri + array([0.5 , 0.25, 0.33]) + """ + return data[np.triu_indices(len(data), k=k)].copy() + + +def binarize_network(network, retain=10, keep_diag=False): + """ + Keep top `retain` % of connections in `network` and binarizes. + + Uses the upper triangle for determining connection percentage, which may + result in disconnected nodes. If this behavior is not desired see + :py:func:`netneurotools.networks.threshold_network`. + + Parameters + ---------- + network : (N, N) array_like + Input graph + retain : [0, 100] float, optional + Percent connections to retain. Default: 10 + keep_diag : bool, optional + Whether to keep the diagonal instead of setting it to 0. Default: False + + Returns + ------- + binarized : (N, N) numpy.ndarray + Binarized, thresholded graph + + See Also + -------- + netneurotools.networks.threshold_network + """ + if retain < 0 or retain > 100: + raise ValueError('Value provided for `retain` is outside [0, 100]: {}' + .format(retain)) + + prctile = 100 - retain + triu = get_triu(network) + thresh = np.percentile(triu, prctile, axis=0, keepdims=True) + binarized = np.array(network > thresh, dtype=int) + + if not keep_diag: + binarized[np.diag_indices(len(binarized))] = 0 + + return binarized + + +def threshold_network(network, retain=10): + """ + Keep top `retain` % of connections in `network` and binarizes. + + Uses a minimum spanning tree to ensure that no nodes are disconnected from + the resulting thresholded graph + + Parameters + ---------- + network : (N, N) array_like + Input graph + retain : [0, 100] float, optional + Percent connections to retain. Default: 10 + + Returns + ------- + thresholded : (N, N) numpy.ndarray + Binarized, thresholded graph + + See Also + -------- + netneurotools.networks.binarize_network + """ + if retain < 0 or retain > 100: + raise ValueError('Value provided for `retain` must be a percent ' + 'in range [0, 100]. Provided: {}'.format(retain)) + + # get number of nodes in graph and invert weights (MINIMUM spanning tree) + nodes = len(network) + graph = np.triu(network * -1) + + # find MST and count # of edges in graph + mst = csgraph.minimum_spanning_tree(graph).todense() + mst_edges = np.sum(mst != 0) + + # determine # of remaining edges and ensure we're not over the limit + remain = int((retain / 100) * ((nodes * (nodes - 1)) / 2)) - mst_edges + if remain < 0: + raise ValueError('Minimum spanning tree with {} edges exceeds desired ' + 'connection density of {}% ({} edges). Cannot ' + 'proceed with graph creation.' + .format(mst_edges, retain, remain + mst_edges)) + + # zero out edges already in MST and then get indices of next best edges + graph -= mst + inds = get_triu(graph).argsort()[:remain] + inds = tuple(e[inds] for e in np.triu_indices_from(graph, k=1)) + + # add edges to MST, symmetrize, and convert to binary matrix + mst[inds] = graph[inds] + mst = np.array((mst + mst.T) != 0, dtype=int) + + return mst \ No newline at end of file diff --git a/netneurotools/networks.py b/netneurotools/networks/randomize.py similarity index 65% rename from netneurotools/networks.py rename to netneurotools/networks/randomize.py index 1803f8c..44227d0 100644 --- a/netneurotools/networks.py +++ b/netneurotools/networks/randomize.py @@ -1,14 +1,11 @@ -# -*- coding: utf-8 -*- -"""Functions for generating group-level networks from individual measurements.""" +"""Functions for generating randomized networks.""" import bct import numpy as np from tqdm import tqdm -from scipy.sparse import csgraph -from sklearn.utils.validation import (check_random_state, check_array, - check_consistent_length) - -from . import utils +from sklearn.utils.validation import ( + check_random_state, check_array, check_consistent_length +) try: from numba import njit @@ -17,388 +14,90 @@ use_numba = False -def func_consensus(data, n_boot=1000, ci=95, seed=None): - """ - Calculate thresholded group consensus functional connectivity graph. - - This function concatenates all time series in `data` and computes a group - correlation matrix based on this extended time series. It then generates - length `T` bootstrapped samples from the concatenated matrix and estimates - confidence intervals for all correlations. Correlations whose sign is - consistent across bootstraps are retained; inconsistent correlations are - set to zero. - - If `n_boot` is set to 0 or None a simple, group-averaged functional - connectivity matrix is estimated, instead. - - Parameters - ---------- - data : (N, T, S) array_like (or a list of S arrays, each shaped as (N, T)) - Pre-processed functional time series, where `N` is the number of nodes, - `T` is the number of volumes in the time series, and `S` is the number - of subjects. - n_boot : int, optional - Number of bootstraps for which to generate correlation. Default: 1000 - ci : (0, 100) float, optional - Confidence interval for which to assess the reliability of correlations - with bootstraps. Default: 95 - seed : int, optional - Random seed. Default: None - - Returns - ------- - consensus : (N, N) numpy.ndarray - Thresholded, group-level correlation matrix - - References - ---------- - Mišić, B., Betzel, R. F., Nematzadeh, A., Goni, J., Griffa, A., Hagmann, - P., Flammini, A., Ahn, Y.-Y., & Sporns, O. (2015). Cooperative and - competitive spreading dynamics on the human connectome. Neuron, 86(6), - 1518-1529. - """ - # check inputs - rs = check_random_state(seed) - if ci > 100 or ci < 0: - raise ValueError("`ci` must be between 0 and 100.") - - # group-average functional connectivity matrix desired instead of bootstrap - if n_boot == 0 or n_boot is None: - if isinstance(data, list): - corrs = [np.corrcoef(sub) for sub in data] - else: - corrs = [np.corrcoef(data[..., sub]) for sub in - range(data.shape[-1])] - return np.nanmean(corrs, axis=0) - - if isinstance(data, list): - collapsed_data = np.hstack(data) - nsample = int(collapsed_data.shape[-1] / len(data)) - else: - collapsed_data = data.reshape((len(data), -1), order='F') - nsample = data.shape[1] - - consensus = np.corrcoef(collapsed_data) - - # only keep the upper triangle for the bootstraps to save on memory usage - triu_inds = np.triu_indices_from(consensus, k=1) - bootstrapped_corrmat = np.zeros((len(triu_inds[0]), n_boot)) - - # generate `n_boot` bootstrap correlation matrices by sampling `t` time - # points from the concatenated time series - for boot in range(n_boot): - inds = rs.randint(collapsed_data.shape[-1], size=nsample) - bootstrapped_corrmat[..., boot] = \ - np.corrcoef(collapsed_data[:, inds])[triu_inds] - - # extract the CIs from the bootstrapped correlation matrices - # we don't need the input anymore so overwrite it - bootstrapped_ci = np.percentile(bootstrapped_corrmat, [100 - ci, ci], - axis=-1, overwrite_input=True) - - # remove unreliable (i.e., CI zero-crossing) correlations - # if the signs of the bootstrapped confidence intervals are different - # (i.e., their signs sum to 0), then we want to remove them - # so, take the logical not of the CI (CI = 0 ---> True) and create a mask - # then, set all connections from the consensus array inside the mask to 0 - remove_inds = np.logical_not(np.sign(bootstrapped_ci).sum(axis=0)) - mask = np.zeros_like(consensus, dtype=bool) - mask[triu_inds] = remove_inds - consensus[mask + mask.T] = 0 - - return consensus - - -def _ecdf(data): - """ - Estimate empirical cumulative distribution function of `data`. - - Taken directly from StackOverflow. See original answer at - https://stackoverflow.com/questions/33345780. - - Parameters - ---------- - data : array_like - - Returns - ------- - prob : numpy.ndarray - Cumulative probability - quantiles : numpy.darray - Quantiles - """ - sample = np.atleast_1d(data) - - # find the unique values and their corresponding counts - quantiles, counts = np.unique(sample, return_counts=True) - - # take the cumulative sum of the counts and divide by the sample size to - # get the cumulative probabilities between 0 and 1 - prob = np.cumsum(counts).astype(float) / sample.size - - # match MATLAB - prob, quantiles = np.append([0], prob), np.append(quantiles[0], quantiles) - - return prob, quantiles - - -def struct_consensus(data, distance, hemiid, - conn_num_inter=None, - conn_num_intra=None, - weighted=False): - """ - Calculate distance-dependent group consensus structural connectivity graph. - - Takes as input a weighted stack of connectivity matrices with dimensions - (N, N, S) where `N` is the number of nodes and `S` is the number of - matrices or subjects. The matrices must be weighted, and ideally with - continuous weights (e.g. fractional anisotropy rather than streamline - count). The second input is a pairwise distance matrix, where distance(i,j) - is the Euclidean distance between nodes i and j. The final input is an - (N, 1) vector which labels nodes as belonging to the right (`hemiid==0`) or - left (`hemiid=1`) hemisphere (note that these values can be flipped as long - as `hemiid` contains only values of 0 and 1). - - This function estimates the average edge length distribution and builds - a group-averaged connectivity matrix that approximates this distribution - with density equal to the mean density across subjects. - - The algorithm works as follows: - - 1. Estimate the cumulative edge length distribution, - 2. Divide the distribution into M length bins, one for each edge that will - be added to the group-average matrix, and - 3. Within each bin, select the edge that is most consistently expressed - expressed across subjects, breaking ties according to average edge - weight (which is why the input matrix `data` must be weighted). - - The algorithm works separately on within/between hemisphere links. - M is the sum of `conn_num_inter` and `conn_num_intra`, if provided. - Otherwise, M is estimated from the data. - - Parameters - ---------- - data : (N, N, S) array_like - Weighted connectivity matrices (i.e., fractional anisotropy), where `N` - is nodes and `S` is subjects - distance : (N, N) array_like - Array where `distance[i, j]` is the Euclidean distance between nodes - `i` and `j` - hemiid : (N, 1) array_like - Hemisphere designation for `N` nodes where a value of 0/1 indicates - node `N_{i}` is in the right/left hemisphere, respectively - conn_num_inter : int, optional - Number of inter-hemispheric connections to include in the consensus - matrix. If `None`, the number of inter-hemispheric connections will be - estimated from the data. Default = `None`. - conn_num_intra : int, optional - Number of intra-hemispheric connections to include in the consensus - matrix. If `None`, the number of intra-hemispheric connections will be - estimated from the data. Default = `None`. - weighted : bool - Flag indicating whether or not to return a weighted consensus map. If - `True`, the consensus will be multiplied by the mean of `data`. - - Returns - ------- - consensus : (N, N) numpy.ndarray - Binary (default) or mean-weighted group-level connectivity matrix - - References - ---------- - Betzel, R. F., Griffa, A., Hagmann, P., & Mišić, B. (2018). Distance- - dependent consensus thresholds for generating group-representative - structural brain networks. Network Neuroscience, 1-22. +def randmio_und(W, itr): """ - # confirm input shapes are as expected - check_consistent_length(data, distance, hemiid) - try: - hemiid = check_array(hemiid, ensure_2d=True) - except ValueError: - raise ValueError('Provided hemiid must be a 2D array. Reshape your ' - 'data using array.reshape(-1, 1) and try again.') from None - - num_node, _, num_sub = data.shape # info on connectivity matrices - pos_data = data > 0 # location of + values in matrix - pos_data_count = pos_data.sum(axis=2) # num sub with + values at each node - - with np.errstate(divide='ignore', invalid='ignore'): - average_weights = data.sum(axis=2) / pos_data_count - - # empty array to hold inter/intra hemispheric connections - consensus = np.zeros((num_node, num_node, 2)) - - for conn_type in range(2): # iterate through inter/intra hemisphere conn - if conn_type == 0: # get inter hemisphere edges - inter_hemi = (hemiid == 0) @ (hemiid == 1).T - keep_conn = np.logical_or(inter_hemi, inter_hemi.T) - else: # get intra hemisphere edges - right_hemi = (hemiid == 0) @ (hemiid == 0).T - left_hemi = (hemiid == 1) @ (hemiid == 1).T - keep_conn = np.logical_or(right_hemi @ right_hemi.T, - left_hemi @ left_hemi.T) - - # mask the distance array for only those edges we want to examine - full_dist_conn = distance * keep_conn - upper_dist_conn = np.atleast_3d(np.triu(full_dist_conn)) - - # generate array of weighted (by distance), positive edges across subs - pos_dist = pos_data * upper_dist_conn - pos_dist = pos_dist[np.nonzero(pos_dist)] - - # determine average # of positive edges across subs - # we will use this to bin the edge weights - if conn_type == 0: - if conn_num_inter is None: - avg_conn_num = len(pos_dist) / num_sub - else: - avg_conn_num = conn_num_inter - else: - if conn_num_intra is None: - avg_conn_num = len(pos_dist) / num_sub - else: - avg_conn_num = conn_num_intra - - # estimate empirical CDF of weighted, positive edges across subs - cumprob, quantiles = _ecdf(pos_dist) - cumprob = np.round(cumprob * avg_conn_num).astype(int) - - # empty array to hold group-average matrix for current connection type - # (i.e., inter/intra hemispheric connections) - group_conn_type = np.zeros((num_node, num_node)) - - # iterate through bins (for edge weights) - for n in range(1, int(avg_conn_num) + 1): - # get current quantile of interest - curr_quant = quantiles[np.logical_and(cumprob >= (n - 1), - cumprob < n)] - if curr_quant.size == 0: - continue - - # find edges in distance connectivity matrix w/i current quantile - mask = np.logical_and(full_dist_conn >= curr_quant.min(), - full_dist_conn <= curr_quant.max()) - i, j = np.where(np.triu(mask)) # indices of edges of interest - - c = pos_data_count[i, j] # get num sub with + values at edges - w = average_weights[i, j] # get averaged weight of edges - - # find locations of edges most commonly represented across subs - indmax = np.argwhere(c == c.max()) - - # determine index of most frequent edge; break ties with higher - # weighted edge - if indmax.size == 1: # only one edge found - group_conn_type[i[indmax], j[indmax]] = 1 - else: # multiple edges found - indmax = indmax[np.argmax(w[indmax])] - group_conn_type[i[indmax], j[indmax]] = 1 - - consensus[:, :, conn_type] = group_conn_type - - # collapse across hemispheric connections types and make symmetrical array - consensus = consensus.sum(axis=2) - consensus = np.logical_or(consensus, consensus.T).astype(int) - - if weighted: - consensus = consensus * np.mean(data, axis=2) - return consensus - + Optimized version of randmio_und. -def binarize_network(network, retain=10, keep_diag=False): - """ - Keep top `retain` % of connections in `network` and binarizes. + This function randomizes an undirected network, while preserving the + degree distribution. The function does not preserve the strength + distribution in weighted networks. - Uses the upper triangle for determining connection percentage, which may - result in disconnected nodes. If this behavior is not desired see - :py:func:`netneurotools.networks.threshold_network`. + This function is significantly faster if numba is enabled, because + the main overhead is `np.random.randint`, see `here `_ Parameters ---------- - network : (N, N) array_like - Input graph - retain : [0, 100] float, optional - Percent connections to retain. Default: 10 - keep_diag : bool, optional - Whether to keep the diagonal instead of setting it to 0. Default: False + W : (N, N) array-like + Undirected binary/weighted connection matrix + itr : int + rewiring parameter. Each edge is rewired approximately itr times. Returns ------- - binarized : (N, N) numpy.ndarray - Binarized, thresholded graph - - See Also - -------- - netneurotools.networks.threshold_network - """ - if retain < 0 or retain > 100: - raise ValueError('Value provided for `retain` is outside [0, 100]: {}' - .format(retain)) - - prctile = 100 - retain - triu = utils.get_triu(network) - thresh = np.percentile(triu, prctile, axis=0, keepdims=True) - binarized = np.array(network > thresh, dtype=int) - - if not keep_diag: - binarized[np.diag_indices(len(binarized))] = 0 - - return binarized - - -def threshold_network(network, retain=10): - """ - Keep top `retain` % of connections in `network` and binarizes. - - Uses a minimum spanning tree to ensure that no nodes are disconnected from - the resulting thresholded graph - - Parameters - ---------- - network : (N, N) array_like - Input graph - retain : [0, 100] float, optional - Percent connections to retain. Default: 10 + W : (N, N) array-like + Randomized network + eff : int + number of actual rewirings carried out + """ # noqa: E501 + W = W.copy() + n = len(W) + i, j = np.where(np.triu(W > 0, 1)) + k = len(i) + itr *= k - Returns - ------- - thresholded : (N, N) numpy.ndarray - Binarized, thresholded graph + # maximum number of rewiring attempts per iteration + max_attempts = np.round(n * k / (n * (n - 1))) + # actual number of successful rewirings + eff = 0 - See Also - -------- - netneurotools.networks.binarize_network - """ - if retain < 0 or retain > 100: - raise ValueError('Value provided for `retain` must be a percent ' - 'in range [0, 100]. Provided: {}'.format(retain)) + for _ in range(int(itr)): + att = 0 + while att <= max_attempts: # while not rewired + while True: + e1, e2 = np.random.randint(k), np.random.randint(k) + while e1 == e2: + e2 = np.random.randint(k) + a, b = i[e1], j[e1] + c, d = i[e2], j[e2] - # get number of nodes in graph and invert weights (MINIMUM spanning tree) - nodes = len(network) - graph = np.triu(network * -1) + if a != c and a != d and b != c and b != d: + break # all 4 vertices must be different - # find MST and count # of edges in graph - mst = csgraph.minimum_spanning_tree(graph).todense() - mst_edges = np.sum(mst != 0) + # flip edge c-d with 50% probability + # to explore all potential rewirings + if np.random.random() > .5: + i[e2], j[e2] = d, c + c, d = d, c - # determine # of remaining edges and ensure we're not over the limit - remain = int((retain / 100) * ((nodes * (nodes - 1)) / 2)) - mst_edges - if remain < 0: - raise ValueError('Minimum spanning tree with {} edges exceeds desired ' - 'connection density of {}% ({} edges). Cannot ' - 'proceed with graph creation.' - .format(mst_edges, retain, remain + mst_edges)) + # rewiring condition + # not flipped + # a--b a b + # TO X + # c--d c d + # if flipped + # a--b a--b a b + # TO TO X + # c--d d--c d c + if not (W[a, d] or W[c, b]): + W[a, d] = W[a, b] + W[a, b] = 0 + W[d, a] = W[b, a] + W[b, a] = 0 + W[c, b] = W[c, d] + W[c, d] = 0 + W[b, c] = W[d, c] + W[d, c] = 0 - # zero out edges already in MST and then get indices of next best edges - graph -= mst - inds = utils.get_triu(graph).argsort()[:remain] - inds = tuple(e[inds] for e in np.triu_indices_from(graph, k=1)) + j[e1] = d + j[e2] = b # reassign edge indices + eff += 1 + break + att += 1 - # add edges to MST, symmetrize, and convert to binary matrix - mst[inds] = graph[inds] - mst = np.array((mst + mst.T) != 0, dtype=int) + return W, eff - return mst +if use_numba: + randmio_und = njit(randmio_und) def match_length_degree_distribution(W, D, nbins=10, nswap=1000, @@ -578,91 +277,6 @@ def match_length_degree_distribution(W, D, nbins=10, nswap=1000, return newB, newW, nr -def randmio_und(W, itr): - """ - Optimized version of randmio_und. - - This function randomizes an undirected network, while preserving the - degree distribution. The function does not preserve the strength - distribution in weighted networks. - - This function is significantly faster if numba is enabled, because - the main overhead is `np.random.randint`, see `here `_ - - Parameters - ---------- - W : (N, N) array-like - Undirected binary/weighted connection matrix - itr : int - rewiring parameter. Each edge is rewired approximately itr times. - - Returns - ------- - W : (N, N) array-like - Randomized network - eff : int - number of actual rewirings carried out - """ # noqa: E501 - W = W.copy() - n = len(W) - i, j = np.where(np.triu(W > 0, 1)) - k = len(i) - itr *= k - - # maximum number of rewiring attempts per iteration - max_attempts = np.round(n * k / (n * (n - 1))) - # actual number of successful rewirings - eff = 0 - - for _ in range(int(itr)): - att = 0 - while att <= max_attempts: # while not rewired - while True: - e1, e2 = np.random.randint(k), np.random.randint(k) - while e1 == e2: - e2 = np.random.randint(k) - a, b = i[e1], j[e1] - c, d = i[e2], j[e2] - - if a != c and a != d and b != c and b != d: - break # all 4 vertices must be different - - # flip edge c-d with 50% probability - # to explore all potential rewirings - if np.random.random() > .5: - i[e2], j[e2] = d, c - c, d = d, c - - # rewiring condition - # not flipped - # a--b a b - # TO X - # c--d c d - # if flipped - # a--b a--b a b - # TO TO X - # c--d d--c d c - if not (W[a, d] or W[c, b]): - W[a, d] = W[a, b] - W[a, b] = 0 - W[d, a] = W[b, a] - W[b, a] = 0 - W[c, b] = W[c, d] - W[c, d] = 0 - W[b, c] = W[d, c] - W[d, c] = 0 - - j[e1] = d - j[e2] = b # reassign edge indices - eff += 1 - break - att += 1 - - return W, eff - - -if use_numba: - randmio_und = njit(randmio_und) def strength_preserving_rand_sa(A, rewiring_iter=10, diff --git a/netneurotools/networks/tests/__init__.py b/netneurotools/networks/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/netneurotools/networks/tests/test_consensus.py b/netneurotools/networks/tests/test_consensus.py new file mode 100644 index 0000000..2c0fc23 --- /dev/null +++ b/netneurotools/networks/tests/test_consensus.py @@ -0,0 +1 @@ +"""For testing netneurotools.networks.consensus functionality.""" diff --git a/netneurotools/networks/tests/test_generative.py b/netneurotools/networks/tests/test_generative.py new file mode 100644 index 0000000..223d4bb --- /dev/null +++ b/netneurotools/networks/tests/test_generative.py @@ -0,0 +1 @@ +"""For testing netneurotools.networks.generative functionality.""" diff --git a/netneurotools/networks/tests/test_networks_utils.py b/netneurotools/networks/tests/test_networks_utils.py new file mode 100644 index 0000000..cabd085 --- /dev/null +++ b/netneurotools/networks/tests/test_networks_utils.py @@ -0,0 +1,11 @@ +"""For testing netneurotools.networks.networks_utils functionality.""" + +import pytest +import numpy as np + +from netneurotools import networks + +def test_get_triu(): + arr = np.arange(9).reshape(3, 3) + assert np.all(networks.get_triu(arr) == np.array([1, 2, 5])) + assert np.all(networks.get_triu(arr, k=0) == np.array([0, 1, 2, 4, 5, 8])) \ No newline at end of file diff --git a/netneurotools/networks/tests/test_randomize.py b/netneurotools/networks/tests/test_randomize.py new file mode 100644 index 0000000..c2a4be2 --- /dev/null +++ b/netneurotools/networks/tests/test_randomize.py @@ -0,0 +1 @@ +"""For testing netneurotools.networks.randomize functionality.""" diff --git a/netneurotools/plotting/__init__.py b/netneurotools/plotting/__init__.py new file mode 100644 index 0000000..a55f49c --- /dev/null +++ b/netneurotools/plotting/__init__.py @@ -0,0 +1,34 @@ +"""Functions for making pretty plots and whatnot.""" + + +from .pysurfer_plotters import ( + plot_conte69, plot_fslr, plot_fsaverage, plot_fsvertex +) + + +from .pyvista_plotters import ( + pv_plot_surface +) + + +from .mpl_plotters import ( + _grid_communities, _sort_communities, + plot_point_brain, plot_mod_heatmap, +) + + +from .color_utils import ( + available_cmaps +) + +__all__ = [ + # pysurfer_plotters + 'plot_conte69', 'plot_fslr', 'plot_fsaverage', 'plot_fsvertex', + # pyvista_plotters + 'pv_plot_surface', + # mpl_plotters + '_grid_communities', '_sort_communities', + 'plot_point_brain', 'plot_mod_heatmap' , + # color_utils + 'available_cmaps' +] diff --git a/netneurotools/colors.py b/netneurotools/plotting/color_utils.py similarity index 98% rename from netneurotools/colors.py rename to netneurotools/plotting/color_utils.py index cf0b7d8..692baf8 100644 --- a/netneurotools/colors.py +++ b/netneurotools/plotting/color_utils.py @@ -1,5 +1,4 @@ -# -*- coding: utf-8 -*- -"""Useful colormaps.""" +"""Functions for working with colors and colormaps.""" from matplotlib.colors import LinearSegmentedColormap, ListedColormap diff --git a/netneurotools/plotting/mpl_plotters.py b/netneurotools/plotting/mpl_plotters.py new file mode 100644 index 0000000..2f79b94 --- /dev/null +++ b/netneurotools/plotting/mpl_plotters.py @@ -0,0 +1,289 @@ +"""Functions for matplotlib-based plotting.""" + +from typing import Iterable +import numpy as np +import matplotlib.pyplot as plt +import matplotlib.patches as mpatches + + +def _grid_communities(communities): + """ + Generate boundaries of `communities`. + + Parameters + ---------- + communities : array_like + Community assignment vector + + Returns + ------- + bounds : list + Boundaries of communities + """ + communities = np.asarray(communities) + if 0 in communities: + communities = communities + 1 + + comm = communities[np.argsort(communities)] + bounds = [] + for i in np.unique(comm): + ind = np.where(comm == i) + if len(ind) > 0: + bounds.append(np.min(ind)) + + bounds.append(len(communities)) + + return bounds + + +def _sort_communities(consensus, communities): + """ + Sort `communities` in `consensus` according to strength. + + Parameters + ---------- + consensus : array_like + Correlation matrix + communities : array_like + Community assignments for `consensus` + + Returns + ------- + inds : np.ndarray + Index array for sorting `consensus` + """ + communities = np.asarray(communities) + if 0 in communities: + communities = communities + 1 + + bounds = _grid_communities(communities) + inds = np.argsort(communities) + + for n, f in enumerate(bounds[:-1]): + i = inds[f:bounds[n + 1]] + cco = i[consensus[np.ix_(i, i)].mean(axis=1).argsort()[::-1]] + inds[f:bounds[n + 1]] = cco + + return inds + + +def plot_mod_heatmap(data, communities, *, inds=None, edgecolor='black', + ax=None, figsize=(6.4, 4.8), xlabels=None, ylabels=None, + xlabelrotation=90, ylabelrotation=0, cbar=True, + square=True, xticklabels=None, yticklabels=None, + mask_diagonal=True, **kwargs): + """ + Plot `data` as heatmap with borders drawn around `communities`. + + Parameters + ---------- + data : (N, N) array_like + Correlation matrix + communities : (N,) array_like + Community assignments for `data` + inds : (N,) array_like, optional + Index array for sorting `data` within `communities`. If None, these + will be generated from `data`. Default: None + edgecolor : str, optional + Color for lines demarcating community boundaries. Default: 'black' + ax : matplotlib.axes.Axes, optional + Axis on which to plot the heatmap. If none provided, a new figure and + axis will be created. Default: None + figsize : tuple, optional + Size of figure to create if `ax` is not provided. Default: (20, 20) + {x,y}labels : list, optional + List of labels on {x,y}-axis for each community in `communities`. The + number of labels should match the number of unique communities. + Default: None + {x,y}labelrotation : float, optional + Angle of the rotation of the labels. Available only if `{x,y}labels` + provided. Default : xlabelrotation: 90, ylabelrotation: 0 + square : bool, optional + Setting the matrix with equal aspect. Default: True + {x,y}ticklabels : list, optional + Incompatible with `{x,y}labels`. List of labels for each entry (not + community) in `data`. Default: None + cbar : bool, optional + Whether to plot colorbar. Default: True + mask_diagonal : bool, optional + Whether to mask the diagonal in the plotted heatmap. Default: True + kwargs : key-value mapping + Keyword arguments for `plt.pcolormesh()` + + Returns + ------- + ax : matplotlib.axes.Axes + Axis object containing plot + """ + for t, label in zip([xticklabels, yticklabels], [xlabels, ylabels]): + if t is not None and label is not None: + raise ValueError('Cannot set both {x,y}labels and {x,y}ticklabels') + + # get indices for sorting consensus + if inds is None: + inds = _sort_communities(data, communities) + + if ax is None: + fig, ax = plt.subplots(1, 1, figsize=figsize) + + # plot data re-ordered based on community and node strength + if mask_diagonal: + plot_data = np.ma.masked_where(np.eye(len(data)), + data[np.ix_(inds, inds)]) + else: + plot_data = data[np.ix_(inds, inds)] + + coll = ax.pcolormesh(plot_data, edgecolor='none', **kwargs) + ax.set(xlim=(0, plot_data.shape[1]), ylim=(0, plot_data.shape[0])) + + # set equal aspect + if square: + ax.set_aspect('equal') + + for side in ['top', 'right', 'left', 'bottom']: + ax.spines[side].set_visible(False) + + # invert the y-axis so it looks "as expected" + ax.invert_yaxis() + + # plot the colorbar + if cbar: + cb = ax.figure.colorbar(coll) + if kwargs.get('rasterized', False): + cb.solids.set_rasterized(True) + + # draw borders around communities + bounds = _grid_communities(communities) + bounds[0] += 0.2 + bounds[-1] -= 0.2 + for n, edge in enumerate(np.diff(bounds)): + ax.add_patch(mpatches.Rectangle((bounds[n], bounds[n]), + edge, edge, fill=False, linewidth=2, + edgecolor=edgecolor)) + + if xlabels is not None or ylabels is not None: + # find the tick locations + initloc = _grid_communities(communities) + tickloc = [] + for loc in range(len(initloc) - 1): + tickloc.append(np.mean((initloc[loc], initloc[loc + 1]))) + + if xlabels is not None: + # make sure number of labels match the number of ticks + if len(tickloc) != len(xlabels): + raise ValueError('Number of labels do not match the number of ' + 'unique communities.') + else: + ax.set_xticks(tickloc) + ax.set_xticklabels(labels=xlabels, rotation=xlabelrotation) + ax.tick_params(left=False, bottom=False) + if ylabels is not None: + # make sure number of labels match the number of ticks + if len(tickloc) != len(ylabels): + raise ValueError('Number of labels do not match the number of ' + 'unique communities.') + else: + ax.set_yticks(tickloc) + ax.set_yticklabels(labels=ylabels, rotation=ylabelrotation) + ax.tick_params(left=False, bottom=False) + + if xticklabels is not None: + labels_ind = [xticklabels[i] for i in inds] + ax.set_xticks(np.arange(len(labels_ind)) + 0.5) + ax.set_xticklabels(labels_ind, rotation=90) + if yticklabels is not None: + labels_ind = [yticklabels[i] for i in inds] + ax.set_yticks(np.arange(len(labels_ind)) + 0.5) + ax.set_yticklabels(labels_ind) + + return ax + + + + +def plot_point_brain(data, coords, views=None, views_orientation='vertical', + views_size=(4, 2.4), cbar=False, robust=True, size=50, + **kwargs): + """ + Plot `data` as a cloud of points in 3D space based on specified `coords`. + + Parameters + ---------- + data : (N,) array_like + Data for an `N` node parcellation; determines color of points + coords : (N, 3) array_like + x, y, z coordinates for `N` node parcellation + views : list, optional + List specifying which views to use. Can be any of {'sagittal', 'sag', + 'coronal', 'cor', 'axial', 'ax'}. If not specified will use 'sagittal' + and 'axial'. Default: None + views_orientation: str, optional + Orientation of the views. Can be either 'vertical' or 'horizontal'. + Default: 'vertical'. + views_size : tuple, optional + Figure size of each view. Default: (4, 2.4) + cbar : bool, optional + Whether to also show colorbar. Default: False + robust : bool, optional + Whether to use robust calculation of `vmin` and `vmax` for color scale. + size : int, optional + Size of points on plot. Default: 50 + **kwargs + Key-value pairs passed to `matplotlib.axes.Axis.scatter` + + Returns + ------- + fig : :class:`matplotlib.figure.Figure` + """ + _views = dict(sagittal=(0, 180), sag=(0, 180), + axial=(90, 180), ax=(90, 180), + coronal=(0, 90), cor=(0, 90)) + + x, y, z = coords[:, 0], coords[:, 1], coords[:, 2] + + if views is None: + views = [_views[f] for f in ['sagittal', 'axial']] + else: + if not isinstance(views, Iterable) or isinstance(views, str): + views = [views] + views = [_views[f] for f in views] + + if views_orientation == 'vertical': + ncols, nrows = 1, len(views) + elif views_orientation == 'horizontal': + ncols, nrows = len(views), 1 + figsize = (ncols * views_size[0], nrows * views_size[1]) + + # create figure and axes (3d projections) + fig, axes = plt.subplots(ncols=ncols, nrows=nrows, + figsize=figsize, + subplot_kw=dict(projection='3d')) + + opts = dict(linewidth=0.5, edgecolor='gray', cmap='viridis') + if robust: + vmin, vmax = np.percentile(data, [2.5, 97.5]) + opts.update(dict(vmin=vmin, vmax=vmax)) + opts.update(kwargs) + + # iterate through saggital/axial views and plot, rotating as needed + for n, view in enumerate(views): + # if only one view then axes is not a list! + ax = axes[n] if len(views) > 1 else axes + # make the actual scatterplot and update the view / aspect ratios + col = ax.scatter(x, y, z, c=data, s=size, **opts) + ax.view_init(*view) + ax.axis('off') + scaling = np.array([ax.get_xlim(), + ax.get_ylim(), + ax.get_zlim()]) + ax.set_box_aspect(tuple(scaling[:, 1] - scaling[:, 0])) + + fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0, wspace=0) + + # add colorbar to axes + if cbar: + cbar = fig.colorbar(col, ax=axes.flatten(), + drawedges=False, shrink=0.7) + cbar.outline.set_linewidth(0) + + return fig diff --git a/netneurotools/plotting.py b/netneurotools/plotting/pysurfer_plotters.py similarity index 61% rename from netneurotools/plotting.py rename to netneurotools/plotting/pysurfer_plotters.py index 3886547..4dd88b6 100644 --- a/netneurotools/plotting.py +++ b/netneurotools/plotting/pysurfer_plotters.py @@ -1,210 +1,10 @@ -# -*- coding: utf-8 -*- -"""Functions for making pretty plots and whatnot.""" +"""Functions for pysurfer-based plotting.""" import os -from typing import Iterable - -import matplotlib.patches as patches -import matplotlib.pyplot as plt -from mpl_toolkits.mplot3d import Axes3D # noqa -import nibabel as nib import numpy as np +import nibabel as nib -from .freesurfer import FSIGNORE, _decode_list - - -def _grid_communities(communities): - """ - Generate boundaries of `communities`. - - Parameters - ---------- - communities : array_like - Community assignment vector - - Returns - ------- - bounds : list - Boundaries of communities - """ - communities = np.asarray(communities) - if 0 in communities: - communities = communities + 1 - - comm = communities[np.argsort(communities)] - bounds = [] - for i in np.unique(comm): - ind = np.where(comm == i) - if len(ind) > 0: - bounds.append(np.min(ind)) - - bounds.append(len(communities)) - - return bounds - - -def sort_communities(consensus, communities): - """ - Sort `communities` in `consensus` according to strength. - - Parameters - ---------- - consensus : array_like - Correlation matrix - communities : array_like - Community assignments for `consensus` - - Returns - ------- - inds : np.ndarray - Index array for sorting `consensus` - """ - communities = np.asarray(communities) - if 0 in communities: - communities = communities + 1 - - bounds = _grid_communities(communities) - inds = np.argsort(communities) - - for n, f in enumerate(bounds[:-1]): - i = inds[f:bounds[n + 1]] - cco = i[consensus[np.ix_(i, i)].mean(axis=1).argsort()[::-1]] - inds[f:bounds[n + 1]] = cco - - return inds - - -def plot_mod_heatmap(data, communities, *, inds=None, edgecolor='black', - ax=None, figsize=(6.4, 4.8), xlabels=None, ylabels=None, - xlabelrotation=90, ylabelrotation=0, cbar=True, - square=True, xticklabels=None, yticklabels=None, - mask_diagonal=True, **kwargs): - """ - Plot `data` as heatmap with borders drawn around `communities`. - - Parameters - ---------- - data : (N, N) array_like - Correlation matrix - communities : (N,) array_like - Community assignments for `data` - inds : (N,) array_like, optional - Index array for sorting `data` within `communities`. If None, these - will be generated from `data`. Default: None - edgecolor : str, optional - Color for lines demarcating community boundaries. Default: 'black' - ax : matplotlib.axes.Axes, optional - Axis on which to plot the heatmap. If none provided, a new figure and - axis will be created. Default: None - figsize : tuple, optional - Size of figure to create if `ax` is not provided. Default: (20, 20) - {x,y}labels : list, optional - List of labels on {x,y}-axis for each community in `communities`. The - number of labels should match the number of unique communities. - Default: None - {x,y}labelrotation : float, optional - Angle of the rotation of the labels. Available only if `{x,y}labels` - provided. Default : xlabelrotation: 90, ylabelrotation: 0 - square : bool, optional - Setting the matrix with equal aspect. Default: True - {x,y}ticklabels : list, optional - Incompatible with `{x,y}labels`. List of labels for each entry (not - community) in `data`. Default: None - cbar : bool, optional - Whether to plot colorbar. Default: True - mask_diagonal : bool, optional - Whether to mask the diagonal in the plotted heatmap. Default: True - kwargs : key-value mapping - Keyword arguments for `plt.pcolormesh()` - - Returns - ------- - ax : matplotlib.axes.Axes - Axis object containing plot - """ - for t, label in zip([xticklabels, yticklabels], [xlabels, ylabels]): - if t is not None and label is not None: - raise ValueError('Cannot set both {x,y}labels and {x,y}ticklabels') - - # get indices for sorting consensus - if inds is None: - inds = sort_communities(data, communities) - - if ax is None: - fig, ax = plt.subplots(1, 1, figsize=figsize) - - # plot data re-ordered based on community and node strength - if mask_diagonal: - plot_data = np.ma.masked_where(np.eye(len(data)), - data[np.ix_(inds, inds)]) - else: - plot_data = data[np.ix_(inds, inds)] - - coll = ax.pcolormesh(plot_data, edgecolor='none', **kwargs) - ax.set(xlim=(0, plot_data.shape[1]), ylim=(0, plot_data.shape[0])) - - # set equal aspect - if square: - ax.set_aspect('equal') - - for side in ['top', 'right', 'left', 'bottom']: - ax.spines[side].set_visible(False) - - # invert the y-axis so it looks "as expected" - ax.invert_yaxis() - - # plot the colorbar - if cbar: - cb = ax.figure.colorbar(coll) - if kwargs.get('rasterized', False): - cb.solids.set_rasterized(True) - - # draw borders around communities - bounds = _grid_communities(communities) - bounds[0] += 0.2 - bounds[-1] -= 0.2 - for n, edge in enumerate(np.diff(bounds)): - ax.add_patch(patches.Rectangle((bounds[n], bounds[n]), - edge, edge, fill=False, linewidth=2, - edgecolor=edgecolor)) - - if xlabels is not None or ylabels is not None: - # find the tick locations - initloc = _grid_communities(communities) - tickloc = [] - for loc in range(len(initloc) - 1): - tickloc.append(np.mean((initloc[loc], initloc[loc + 1]))) - - if xlabels is not None: - # make sure number of labels match the number of ticks - if len(tickloc) != len(xlabels): - raise ValueError('Number of labels do not match the number of ' - 'unique communities.') - else: - ax.set_xticks(tickloc) - ax.set_xticklabels(labels=xlabels, rotation=xlabelrotation) - ax.tick_params(left=False, bottom=False) - if ylabels is not None: - # make sure number of labels match the number of ticks - if len(tickloc) != len(ylabels): - raise ValueError('Number of labels do not match the number of ' - 'unique communities.') - else: - ax.set_yticks(tickloc) - ax.set_yticklabels(labels=ylabels, rotation=ylabelrotation) - ax.tick_params(left=False, bottom=False) - - if xticklabels is not None: - labels_ind = [xticklabels[i] for i in inds] - ax.set_xticks(np.arange(len(labels_ind)) + 0.5) - ax.set_xticklabels(labels_ind, rotation=90) - if yticklabels is not None: - labels_ind = [yticklabels[i] for i in inds] - ax.set_yticks(np.arange(len(labels_ind)) + 0.5) - ax.set_yticklabels(labels_ind) - - return ax - +from ..datasets import FREESURFER_IGNORE, _get_freesurfer_subjid def plot_conte69(data, lhlabel, rhlabel, surf='midthickness', vmin=None, vmax=None, colormap='viridis', @@ -319,7 +119,7 @@ def plot_fslr(data, lhlabel, rhlabel, surf_atlas='conte69', scene : mayavi.Scene Scene object containing plot """ - from .datasets import fetch_conte69, fetch_yerkes19 + from ..datasets import fetch_conte69, fetch_yerkes19 try: from mayavi import mlab except ImportError: @@ -388,42 +188,7 @@ def plot_fslr(data, lhlabel, rhlabel, surf_atlas='conte69', return lhplot, rhplot -def _get_fs_subjid(subject_id, subjects_dir=None): - """ - Get fsaverage version `subject_id`, fetching if required. - Parameters - ---------- - subject_id : str - FreeSurfer subject ID - subjects_dir : str, optional - Path to FreeSurfer subject directory. If not set, will inherit from - the environmental variable $SUBJECTS_DIR. Default: None - - Returns - ------- - subject_id : str - FreeSurfer subject ID - subjects_dir : str - Path to subject directory with `subject_id` - """ - from netneurotools.utils import check_fs_subjid - - # check for FreeSurfer install w/fsaverage; otherwise, fetch required - try: - subject_id, subjects_dir = check_fs_subjid(subject_id, subjects_dir) - except FileNotFoundError: - if 'fsaverage' not in subject_id: - raise ValueError('Provided subject {} does not exist in provided ' - 'subjects_dir {}' - .format(subject_id, subjects_dir)) from None - from netneurotools.datasets import fetch_fsaverage - from netneurotools.datasets.utils import _get_data_dir - fetch_fsaverage(subject_id) - subjects_dir = os.path.join(_get_data_dir(), 'tpl-fsaverage') - subject_id, subjects_dir = check_fs_subjid(subject_id, subjects_dir) - - return subject_id, subjects_dir def plot_fsaverage(data, *, lhannot, rhannot, order='lr', mask=None, @@ -503,7 +268,11 @@ def plot_fsaverage(data, *, lhannot, rhannot, order='lr', mask=None, ... rhannot=schaefer.rh) # doctest: +SKIP """ - subject_id, subjects_dir = _get_fs_subjid(subject_id, subjects_dir) + def _decode_list(vals): + """List decoder.""" + return [val.decode() if hasattr(val, 'decode') else val for val in vals] + + subject_id, subjects_dir = _get_freesurfer_subjid(subject_id, subjects_dir) # cast data to float (required for NaNs) data = np.asarray(data, dtype='float') @@ -521,7 +290,7 @@ def plot_fsaverage(data, *, lhannot, rhannot, order='lr', mask=None, vmax = np.nanpercentile(data, 97.5) # parcels that should not be included in parcellation - drop = FSIGNORE.copy() + drop = FREESURFER_IGNORE.copy() if noplot is not None: if isinstance(noplot, str): noplot = [noplot] @@ -637,7 +406,7 @@ def plot_fsvertex(data, *, order='lr', surf='pial', views='lat', raise ImportError('Cannot use plot_fsaverage() if pysurfer is not ' 'installed. Please install pysurfer and try again.') from None - subject_id, subjects_dir = _get_fs_subjid(subject_id, subjects_dir) + subject_id, subjects_dir = _get_freesurfer_subjid(subject_id, subjects_dir) # cast data to float (required for NaNs) data = np.asarray(data, dtype='float') @@ -711,92 +480,4 @@ def plot_fsvertex(data, *, order='lr', surf='pial', views='lat', cm.scalar_bar.number_of_labels = num_labels surf[n].render() - return brain - - -def plot_point_brain(data, coords, views=None, views_orientation='vertical', - views_size=(4, 2.4), cbar=False, robust=True, size=50, - **kwargs): - """ - Plot `data` as a cloud of points in 3D space based on specified `coords`. - - Parameters - ---------- - data : (N,) array_like - Data for an `N` node parcellation; determines color of points - coords : (N, 3) array_like - x, y, z coordinates for `N` node parcellation - views : list, optional - List specifying which views to use. Can be any of {'sagittal', 'sag', - 'coronal', 'cor', 'axial', 'ax'}. If not specified will use 'sagittal' - and 'axial'. Default: None - views_orientation: str, optional - Orientation of the views. Can be either 'vertical' or 'horizontal'. - Default: 'vertical'. - views_size : tuple, optional - Figure size of each view. Default: (4, 2.4) - cbar : bool, optional - Whether to also show colorbar. Default: False - robust : bool, optional - Whether to use robust calculation of `vmin` and `vmax` for color scale. - size : int, optional - Size of points on plot. Default: 50 - **kwargs - Key-value pairs passed to `matplotlib.axes.Axis.scatter` - - Returns - ------- - fig : :class:`matplotlib.figure.Figure` - """ - _views = dict(sagittal=(0, 180), sag=(0, 180), - axial=(90, 180), ax=(90, 180), - coronal=(0, 90), cor=(0, 90)) - - x, y, z = coords[:, 0], coords[:, 1], coords[:, 2] - - if views is None: - views = [_views[f] for f in ['sagittal', 'axial']] - else: - if not isinstance(views, Iterable) or isinstance(views, str): - views = [views] - views = [_views[f] for f in views] - - if views_orientation == 'vertical': - ncols, nrows = 1, len(views) - elif views_orientation == 'horizontal': - ncols, nrows = len(views), 1 - figsize = (ncols * views_size[0], nrows * views_size[1]) - - # create figure and axes (3d projections) - fig, axes = plt.subplots(ncols=ncols, nrows=nrows, - figsize=figsize, - subplot_kw=dict(projection='3d')) - - opts = dict(linewidth=0.5, edgecolor='gray', cmap='viridis') - if robust: - vmin, vmax = np.percentile(data, [2.5, 97.5]) - opts.update(dict(vmin=vmin, vmax=vmax)) - opts.update(kwargs) - - # iterate through saggital/axial views and plot, rotating as needed - for n, view in enumerate(views): - # if only one view then axes is not a list! - ax = axes[n] if len(views) > 1 else axes - # make the actual scatterplot and update the view / aspect ratios - col = ax.scatter(x, y, z, c=data, s=size, **opts) - ax.view_init(*view) - ax.axis('off') - scaling = np.array([ax.get_xlim(), - ax.get_ylim(), - ax.get_zlim()]) - ax.set_box_aspect(tuple(scaling[:, 1] - scaling[:, 0])) - - fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0, wspace=0) - - # add colorbar to axes - if cbar: - cbar = fig.colorbar(col, ax=axes.flatten(), - drawedges=False, shrink=0.7) - cbar.outline.set_linewidth(0) - - return fig + return brain \ No newline at end of file diff --git a/netneurotools/plotting/pyvista_plotters.py b/netneurotools/plotting/pyvista_plotters.py new file mode 100644 index 0000000..fc98223 --- /dev/null +++ b/netneurotools/plotting/pyvista_plotters.py @@ -0,0 +1,5 @@ +"""Functions for pyvista-based plotting.""" + +def pv_plot_surface(): + """Plot a surface using PyVista.""" + pass \ No newline at end of file diff --git a/netneurotools/plotting/tests/__init__.py b/netneurotools/plotting/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/netneurotools/plotting/tests/test_colorutils.py b/netneurotools/plotting/tests/test_colorutils.py new file mode 100644 index 0000000..cbf03e7 --- /dev/null +++ b/netneurotools/plotting/tests/test_colorutils.py @@ -0,0 +1 @@ +"""For testing netneurotools.plotting.color_utils functionality.""" diff --git a/netneurotools/plotting/tests/test_mpl.py b/netneurotools/plotting/tests/test_mpl.py new file mode 100644 index 0000000..71141a2 --- /dev/null +++ b/netneurotools/plotting/tests/test_mpl.py @@ -0,0 +1,36 @@ +"""For testing netneurotools.plotting.mpl_plotters functionality.""" + +import numpy as np +import matplotlib.pyplot as plt +from netneurotools import plotting + +def test_grid_communities(): + """Test _grid_communities function.""" + comms = np.asarray([0, 0, 0, 0, 1, 1, 1, 1, 2, 2]) + # check that comms with / without 0 community label yields same output + assert np.allclose(plotting._grid_communities(comms), [0, 4, 8, 10]) + assert np.allclose(plotting._grid_communities(comms + 1), [0, 4, 8, 10]) + + +def test_sort_communities(): + """Test sort_communities function.""" + data = np.arange(9).reshape(3, 3) + comms = np.asarray([0, 0, 2]) + # check that comms with / without 0 community label yields same output + assert np.allclose(plotting._sort_communities(data, comms), [1, 0, 2]) + assert np.allclose(plotting._sort_communities(data, comms + 1), [1, 0, 2]) + + +def test_plot_mod_heatmap(): + """Test plot_mod_heatmap function.""" + data = np.random.rand(100, 100) + comms = np.random.choice(4, size=(100,)) + ax = plotting.plot_mod_heatmap(data, comms) + assert isinstance(ax, plt.Axes) + +def test_plot_point_brain(): + """Test plot_point_brain function.""" + data = np.random.rand(100) + coords = np.random.rand(100, 3) + out = plotting.plot_point_brain(data, coords) + assert isinstance(out, plt.Figure) diff --git a/netneurotools/plotting/tests/test_pysurfer.py b/netneurotools/plotting/tests/test_pysurfer.py new file mode 100644 index 0000000..3edb943 --- /dev/null +++ b/netneurotools/plotting/tests/test_pysurfer.py @@ -0,0 +1,27 @@ +"""For testing netneurotools.plotting.pysurfer_plotters functionality.""" + +import pytest +import numpy as np +from netneurotools import datasets, plotting + +@pytest.mark.filterwarnings('ignore') +def test_plot_fsvertex(): + """Test plotting on a freesurfer vertex.""" + surfer = pytest.importorskip('surfer') + + data = np.random.rand(20484) + brain = plotting.plot_fsvertex(data, subject_id='fsaverage5', + offscreen=True) + assert isinstance(brain, surfer.Brain) + + +@pytest.mark.filterwarnings('ignore') +def test_plot_fsaverage(): + """Test plotting on a freesurfer average brain.""" + surfer = pytest.importorskip('surfer') + + data = np.random.rand(68) + lhannot, rhannot = datasets.fetch_cammoun2012('fsaverage5')['scale033'] + brain = plotting.plot_fsaverage(data, lhannot=lhannot, rhannot=rhannot, + subject_id='fsaverage5', offscreen=True) + assert isinstance(brain, surfer.Brain) \ No newline at end of file diff --git a/netneurotools/plotting/tests/test_pyvista.py b/netneurotools/plotting/tests/test_pyvista.py new file mode 100644 index 0000000..0b87931 --- /dev/null +++ b/netneurotools/plotting/tests/test_pyvista.py @@ -0,0 +1 @@ +"""For testing netneurotools.plotting.pyvista_plotters functionality.""" diff --git a/netneurotools/spatial/__init__.py b/netneurotools/spatial/__init__.py new file mode 100644 index 0000000..a958655 --- /dev/null +++ b/netneurotools/spatial/__init__.py @@ -0,0 +1,12 @@ +"""Functions for handling spatial brain data.""" + + +from .spatial_stats import ( + morans_i, local_morans_i +) + + +__all__ = [ + # spatial_stats + 'morans_i', 'local_morans_i' +] diff --git a/netneurotools/spatial/gaussian_random_field.py b/netneurotools/spatial/gaussian_random_field.py new file mode 100644 index 0000000..7b40565 --- /dev/null +++ b/netneurotools/spatial/gaussian_random_field.py @@ -0,0 +1 @@ +"""Functions for working with Gaussian random fields.""" diff --git a/netneurotools/spatial/spatial_stats.py b/netneurotools/spatial/spatial_stats.py new file mode 100644 index 0000000..1552b04 --- /dev/null +++ b/netneurotools/spatial/spatial_stats.py @@ -0,0 +1,10 @@ +"""Functions for calculating spatial statistics.""" + + +def morans_i(): + """Calculate Moran's I for spatial autocorrelation.""" + pass + +def local_morans_i(): + """Calculate local Moran's I for spatial autocorrelation.""" + pass \ No newline at end of file diff --git a/netneurotools/spatial/tests/__init__.py b/netneurotools/spatial/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/netneurotools/spatial/tests/test_grf.py b/netneurotools/spatial/tests/test_grf.py new file mode 100644 index 0000000..962bdb9 --- /dev/null +++ b/netneurotools/spatial/tests/test_grf.py @@ -0,0 +1 @@ +"""For testing netneurotools.spatial.gaussian_random_field functionality.""" diff --git a/netneurotools/spatial/tests/test_spatialstats.py b/netneurotools/spatial/tests/test_spatialstats.py new file mode 100644 index 0000000..fa9c7f6 --- /dev/null +++ b/netneurotools/spatial/tests/test_spatialstats.py @@ -0,0 +1 @@ +"""For testing netneurotools.spatial.spatial_stats functionality.""" diff --git a/netneurotools/stats.py b/netneurotools/stats.py deleted file mode 100644 index 952995b..0000000 --- a/netneurotools/stats.py +++ /dev/null @@ -1,1593 +0,0 @@ -# -*- coding: utf-8 -*- -"""Functions for performing statistical preprocessing and analyses.""" - -import warnings - -import numpy as np -from tqdm import tqdm -from itertools import combinations -from scipy import optimize, spatial, special, stats as sstats -try: # scipy >= 1.8.0 - from scipy.stats._stats_py import _chk2_asarray -except ImportError: # scipy < 1.8.0 - from scipy.stats.stats import _chk2_asarray -from sklearn.utils.validation import check_random_state -from sklearn.linear_model import LinearRegression -from joblib import Parallel, delayed - - -from . import utils -from .metrics import _graph_laplacian - -try: - from numba import njit - use_numba = True -except ImportError: - use_numba = False - - -def residualize(X, Y, Xc=None, Yc=None, normalize=True, add_intercept=True): - """ - Return residuals of regression equation from `Y ~ X`. - - Parameters - ---------- - X : (N[, R]) array_like - Coefficient matrix of `R` variables for `N` subjects - Y : (N[, F]) array_like - Dependent variable matrix of `F` variables for `N` subjects - Xc : (M[, R]) array_like, optional - Coefficient matrix of `R` variables for `M` subjects. If not specified - then `X` is used to estimate betas. Default: None - Yc : (M[, F]) array_like, optional - Dependent variable matrix of `F` variables for `M` subjects. If not - specified then `Y` is used to estimate betas. Default: None - normalize : bool, optional - Whether to normalize (i.e., z-score) residuals. Will use residuals from - `Yc ~ Xc` for generating mean and variance. Default: True - add_intercept : bool, optional - Whether to add intercept to `X` (and `Xc`, if provided). The intercept - will not be removed, just used in beta estimation. Default: True - - Returns - ------- - Yr : (N, F) numpy.ndarray - Residuals of `Y ~ X` - - Notes - ----- - If both `Xc` and `Yc` are provided, these are used to calculate betas which - are then applied to `X` and `Y`. - """ - if ((Yc is None and Xc is not None) or (Yc is not None and Xc is None)): - raise ValueError('If processing against a comparative group, you must ' - 'provide both `Xc` and `Yc`.') - - X, Y = np.asarray(X), np.asarray(Y) - - if Yc is None: - Xc, Yc = X.copy(), Y.copy() - else: - Xc, Yc = np.asarray(Xc), np.asarray(Yc) - - # add intercept to regressors if requested and calculate fit - if add_intercept: - X, Xc = utils.add_constant(X), utils.add_constant(Xc) - betas, *rest = np.linalg.lstsq(Xc, Yc, rcond=None) - - # remove intercept from regressors and betas for calculation of residuals - if add_intercept: - betas = betas[:-1] - X, Xc = X[:, :-1], Xc[:, :-1] - - # calculate residuals - Yr = Y - (X @ betas) - Ycr = Yc - (Xc @ betas) - - if normalize: - Yr = sstats.zmap(Yr, compare=Ycr) - - return Yr - - -def get_mad_outliers(data, thresh=3.5): - """ - Determine which samples in `data` are outliers. - - Uses the Median Absolute Deviation for determining whether datapoints are - outliers - - Parameters - ---------- - data : (N, M) array_like - Data array where `N` is samples and `M` is features - thresh : float, optional - Modified z-score. Observations with a modified z-score (based on the - median absolute deviation) greater than this value will be classified - as outliers. Default: 3.5 - - Returns - ------- - outliers : (N,) numpy.ndarray - Boolean array where True indicates an outlier - - Notes - ----- - Taken directly from https://stackoverflow.com/a/22357811 - - References - ---------- - Boris Iglewicz and David Hoaglin (1993), "Volume 16: How to Detect and - Handle Outliers", The ASQC Basic References in Quality Control: Statistical - Techniques, Edward F. Mykytka, Ph.D., Editor. - - Examples - -------- - >>> from netneurotools import stats - - Create array with three samples of four features each: - - >>> X = np.array([[0, 5, 10, 15], [1, 4, 11, 16], [100, 100, 100, 100]]) - >>> X - array([[ 0, 5, 10, 15], - [ 1, 4, 11, 16], - [100, 100, 100, 100]]) - - Determine which sample(s) is outlier: - - >>> outliers = stats.get_mad_outliers(X) - >>> outliers - array([False, False, True]) - """ - data = np.asarray(data) - - if data.ndim == 1: - data = np.vstack(data) - if data.ndim > 2: - data = data.reshape(len(data), -1) - - median = np.nanmedian(data, axis=0) - diff = np.nansum((data - median)**2, axis=-1) - diff = np.sqrt(diff) - med_abs_deviation = np.median(diff) - - modified_z_score = 0.6745 * diff / med_abs_deviation - - return modified_z_score > thresh - - -def permtest_1samp(a, popmean, axis=0, n_perm=1000, seed=0): - """ - Non-parametric equivalent of :py:func:`scipy.stats.ttest_1samp`. - - Generates two-tailed p-value for hypothesis of whether `a` differs from - `popmean` using permutation tests - - Parameters - ---------- - a : array_like - Sample observations - popmean : float or array_like - Expected valued in null hypothesis. If array_like then it must have the - same shape as `a` excluding the `axis` dimension - axis : int or None, optional - Axis along which to compute test. If None, compute over the whole array - of `a`. Default: 0 - n_perm : int, optional - Number of permutations to assess. Unless `a` is very small along `axis` - this will approximate a randomization test via Monte Carlo simulations. - Default: 1000 - seed : {int, np.random.RandomState instance, None}, optional - Seed for random number generation. Set to None for "randomness". - Default: 0 - - Returns - ------- - stat : float or numpy.ndarray - Difference from `popmean` - pvalue : float or numpy.ndarray - Non-parametric p-value - - Notes - ----- - Providing multiple values to `popmean` to run *independent* tests in - parallel is not currently supported. - - The lowest p-value that can be returned by this function is equal to 1 / - (`n_perm` + 1). - - Examples - -------- - >>> from netneurotools import stats - >>> np.random.seed(7654567) # set random seed for reproducible results - >>> rvs = np.random.normal(loc=5, scale=10, size=(50, 2)) - - Test if mean of random sample is equal to true mean, and different mean. We - reject the null hypothesis in the second case and don't reject it in the - first case. - - >>> stats.permtest_1samp(rvs, 5.0) - (array([-0.985602 , -0.05204969]), array([0.48551449, 0.95904096])) - >>> stats.permtest_1samp(rvs, 0.0) - (array([4.014398 , 4.94795031]), array([0.00699301, 0.000999 ])) - - Example using axis and non-scalar dimension for population mean - - >>> stats.permtest_1samp(rvs, [5.0, 0.0]) - (array([-0.985602 , 4.94795031]), array([0.48551449, 0.000999 ])) - >>> stats.permtest_1samp(rvs.T, [5.0, 0.0], axis=1) - (array([-0.985602 , 4.94795031]), array([0.51548452, 0.000999 ])) - """ - a, popmean, axis = _chk2_asarray(a, popmean, axis) - rs = check_random_state(seed) - - if a.size == 0: - return np.nan, np.nan - - # ensure popmean will broadcast to `a` correctly - if popmean.ndim != a.ndim: - popmean = np.expand_dims(popmean, axis=axis) - - # center `a` around `popmean` and calculate original mean - zeroed = a - popmean - true_mean = zeroed.mean(axis=axis) / 1 - abs_mean = np.abs(true_mean) - - # this for loop is not _the fastest_ but is memory efficient - # the broadcasting alt. would mean storing zeroed.size * n_perm in memory - permutations = np.ones(true_mean.shape) - for _ in range(n_perm): - flipped = zeroed * rs.choice([-1, 1], size=zeroed.shape) # sign flip - permutations += np.abs(flipped.mean(axis=axis)) >= abs_mean - - pvals = permutations / (n_perm + 1) # + 1 in denom accounts for true_mean - - return true_mean, pvals - - -def permtest_rel(a, b, axis=0, n_perm=1000, seed=0): - """ - Non-parametric equivalent of :py:func:`scipy.stats.ttest_rel`. - - Generates two-tailed p-value for hypothesis of whether related samples `a` - and `b` differ using permutation tests - - Parameters - ---------- - a, b : array_like - Sample observations. These arrays must have the same shape. - axis : int or None, optional - Axis along which to compute test. If None, compute over whole arrays - of `a` and `b`. Default: 0 - n_perm : int, optional - Number of permutations to assess. Unless `a` and `b` are very small - along `axis` this will approximate a randomization test via Monte - Carlo simulations. Default: 1000 - seed : {int, np.random.RandomState instance, None}, optional - Seed for random number generation. Set to None for "randomness". - Default: 0 - - Returns - ------- - stat : float or numpy.ndarray - Average difference between `a` and `b` - pvalue : float or numpy.ndarray - Non-parametric p-value - - Notes - ----- - The lowest p-value that can be returned by this function is equal to 1 / - (`n_perm` + 1). - - Examples - -------- - >>> from netneurotools import stats - - >>> np.random.seed(12345678) # set random seed for reproducible results - >>> rvs1 = np.random.normal(loc=5, scale=10, size=500) - >>> rvs2 = (np.random.normal(loc=5, scale=10, size=500) - ... + np.random.normal(scale=0.2, size=500)) - >>> stats.permtest_rel(rvs1, rvs2) # doctest: +SKIP - (-0.16506275161572695, 0.8021978021978022) - - >>> rvs3 = (np.random.normal(loc=8, scale=10, size=500) - ... + np.random.normal(scale=0.2, size=500)) - >>> stats.permtest_rel(rvs1, rvs3) # doctest: +SKIP - (2.40533726097883, 0.000999000999000999) - """ - a, b, axis = _chk2_asarray(a, b, axis) - rs = check_random_state(seed) - - if a.shape[axis] != b.shape[axis]: - raise ValueError('Provided arrays do not have same length along axis') - - if a.size == 0 or b.size == 0: - return np.nan, np.nan - - # calculate original difference in means - ab = np.stack([a, b], axis=0) - if ab.ndim < 3: - ab = np.expand_dims(ab, axis=-1) - true_diff = np.squeeze(np.diff(ab, axis=0)).mean(axis=axis) / 1 - abs_true = np.abs(true_diff) - - # idx array - reidx = np.meshgrid(*[range(f) for f in ab.shape], indexing='ij') - - permutations = np.ones(true_diff.shape) - for _ in range(n_perm): - # use this to re-index (i.e., swap along) the first axis of `ab` - swap = rs.random_sample(ab.shape[:-1]).argsort(axis=axis) - reidx[0] = np.repeat(swap[..., np.newaxis], ab.shape[-1], axis=-1) - # recompute difference between `a` and `b` (i.e., first axis of `ab`) - pdiff = np.squeeze(np.diff(ab[tuple(reidx)], axis=0)).mean(axis=axis) - permutations += np.abs(pdiff) >= abs_true - - pvals = permutations / (n_perm + 1) # + 1 in denom accounts for true_diff - - return true_diff, pvals - - -def permtest_pearsonr(a, b, axis=0, n_perm=1000, resamples=None, seed=0): - """ - Non-parametric equivalent of :py:func:`scipy.stats.pearsonr`. - - Generates two-tailed p-value for hypothesis of whether samples `a` and `b` - are correlated using permutation tests - - Parameters - ---------- - a,b : (N[, M]) array_like - Sample observations. These arrays must have the same length and either - an equivalent number of columns or be broadcastable - axis : int or None, optional - Axis along which to compute test. If None, compute over whole arrays - of `a` and `b`. Default: 0 - n_perm : int, optional - Number of permutations to assess. Unless `a` and `b` are very small - along `axis` this will approximate a randomization test via Monte - Carlo simulations. Default: 1000 - resamples : (N, P) array_like, optional - Resampling array used to shuffle `a` when generating null distribution - of correlations. This array must have the same length as `a` and `b` - and should have at least the same number of columns as `n_perm` (if it - has more then only `n_perm` columns will be used. When not specified a - standard permutation is used to shuffle `a`. Default: None - seed : {int, np.random.RandomState instance, None}, optional - Seed for random number generation. Set to None for "randomness". - Default: 0 - - Returns - ------- - corr : float or numpyndarray - Correlations - pvalue : float or numpy.ndarray - Non-parametric p-value - - Notes - ----- - The lowest p-value that can be returned by this function is equal to 1 / - (`n_perm` + 1). - - Examples - -------- - >>> from netneurotools import datasets, stats - - >>> np.random.seed(12345678) # set random seed for reproducible results - >>> x, y = datasets.make_correlated_xy(corr=0.1, size=100) - >>> stats.permtest_pearsonr(x, y) # doctest: +SKIP - (0.10032564626876286, 0.3046953046953047) - - >>> x, y = datasets.make_correlated_xy(corr=0.5, size=100) - >>> stats.permtest_pearsonr(x, y) # doctest: +SKIP - (0.500040365781984, 0.000999000999000999) - - Also works with multiple columns by either broadcasting the smaller array - to the larger: - - >>> z = x + np.random.normal(loc=1, size=100) - >>> stats.permtest_pearsonr(x, np.column_stack([y, z])) - (array([0.50004037, 0.25843187]), array([0.000999 , 0.01098901])) - - or by using matching columns in the two arrays (e.g., `x` and `y` vs - `a` and `b`): - - >>> a, b = datasets.make_correlated_xy(corr=0.9, size=100) - >>> stats.permtest_pearsonr(np.column_stack([x, a]), np.column_stack([y, b])) - (array([0.50004037, 0.89927523]), array([0.000999, 0.000999])) - """ # noqa - a, b, axis = _chk2_asarray(a, b, axis) - rs = check_random_state(seed) - - if len(a) != len(b): - raise ValueError('Provided arrays do not have same length') - - if a.size == 0 or b.size == 0: - return np.nan, np.nan - - if resamples is not None: - if n_perm > resamples.shape[-1]: - raise ValueError('Number of permutations requested exceeds size ' - 'of resampling array.') - - # divide by one forces coercion to float if ndim = 0 - true_corr = efficient_pearsonr(a, b)[0] / 1 - abs_true = np.abs(true_corr) - - permutations = np.ones(true_corr.shape) - for perm in range(n_perm): - # permute `a` and determine whether correlations exceed original - if resamples is None: - ap = a[rs.permutation(len(a))] - else: - ap = a[resamples[:, perm]] - permutations += np.abs(efficient_pearsonr(ap, b)[0]) >= abs_true - - pvals = permutations / (n_perm + 1) # + 1 in denom accounts for true_corr - - return true_corr, pvals - - -def efficient_pearsonr(a, b, ddof=1, nan_policy='propagate'): - """ - Compute correlation of matching columns in `a` and `b`. - - Parameters - ---------- - a,b : array_like - Sample observations. These arrays must have the same length and either - an equivalent number of columns or be broadcastable - ddof : int, optional - Degrees of freedom correction in the calculation of the standard - deviation. Default: 1 - nan_policy : bool, optional - Defines how to handle when input contains nan. 'propagate' returns nan, - 'raise' throws an error, 'omit' performs the calculations ignoring nan - values. Default: 'propagate' - - Returns - ------- - corr : float or numpy.ndarray - Pearson's correlation coefficient between matching columns of inputs - pval : float or numpy.ndarray - Two-tailed p-values - - Notes - ----- - If either input contains nan and nan_policy is set to 'omit', both arrays - will be masked to omit the nan entries. - - Examples - -------- - >>> from netneurotools import datasets, stats - - Generate some not-very-correlated and some highly-correlated data: - - >>> np.random.seed(12345678) # set random seed for reproducible results - >>> x1, y1 = datasets.make_correlated_xy(corr=0.1, size=100) - >>> x2, y2 = datasets.make_correlated_xy(corr=0.8, size=100) - - Calculate both correlations simultaneously: - - >>> stats.efficient_pearsonr(np.c_[x1, x2], np.c_[y1, y2]) - (array([0.10032565, 0.79961189]), array([3.20636135e-01, 1.97429944e-23])) - """ - a, b, axis = _chk2_asarray(a, b, 0) - if len(a) != len(b): - raise ValueError('Provided arrays do not have same length') - - if a.size == 0 or b.size == 0: - return np.nan, np.nan - - if nan_policy not in ('propagate', 'raise', 'omit'): - raise ValueError(f'Value for nan_policy "{nan_policy}" not allowed') - - a, b = a.reshape(len(a), -1), b.reshape(len(b), -1) - if (a.shape[1] != b.shape[1]): - a, b = np.broadcast_arrays(a, b) - - mask = np.logical_or(np.isnan(a), np.isnan(b)) - if nan_policy == 'raise' and np.any(mask): - raise ValueError('Input cannot contain NaN when nan_policy is "omit"') - elif nan_policy == 'omit': - # avoid making copies of the data, if possible - a = np.ma.masked_array(a, mask, copy=False, fill_value=np.nan) - b = np.ma.masked_array(b, mask, copy=False, fill_value=np.nan) - - with np.errstate(invalid='ignore'): - corr = (sstats.zscore(a, ddof=ddof, nan_policy=nan_policy) - * sstats.zscore(b, ddof=ddof, nan_policy=nan_policy)) - - sumfunc, n_obs = np.sum, len(a) - if nan_policy == 'omit': - corr = corr.filled(np.nan) - sumfunc = np.nansum - n_obs = np.squeeze(np.sum(np.logical_not(np.isnan(corr)), axis=0)) - - corr = sumfunc(corr, axis=0) / (n_obs - 1) - corr = np.squeeze(np.clip(corr, -1, 1)) / 1 - - # taken from scipy.stats - ab = (n_obs / 2) - 1 - prob = 2 * special.btdtr(ab, ab, 0.5 * (1 - np.abs(corr))) - - return corr, prob - - -def _gen_rotation(seed=None): - """ - Generate random matrix for rotating spherical coordinates. - - Parameters - ---------- - seed : {int, np.random.RandomState instance, None}, optional - Seed for random number generation - - Returns - ------- - rotate_{l,r} : (3, 3) numpy.ndarray - Rotations for left and right hemisphere coordinates, respectively - """ - rs = check_random_state(seed) - - # for reflecting across Y-Z plane - reflect = np.array([[-1, 0, 0], [0, 1, 0], [0, 0, 1]]) - - # generate rotation for left - rotate_l, temp = np.linalg.qr(rs.normal(size=(3, 3))) - rotate_l = rotate_l @ np.diag(np.sign(np.diag(temp))) - if np.linalg.det(rotate_l) < 0: - rotate_l[:, 0] = -rotate_l[:, 0] - - # reflect the left rotation across Y-Z plane - rotate_r = reflect @ rotate_l @ reflect - - return rotate_l, rotate_r - - -def gen_spinsamples(coords, hemiid, n_rotate=1000, check_duplicates=True, - method='original', exact=False, seed=None, verbose=False, - return_cost=False): - """ - Return a resampling array for `coords` obtained from rotations / spins. - - Using the method initially proposed in [ST1]_ (and later modified + updated - based on findings in [ST2]_ and [ST3]_), this function applies random - rotations to the user-supplied `coords` in order to generate a resampling - array that preserves its spatial embedding. Rotations are generated for one - hemisphere and mirrored for the other (see `hemiid` for more information). - - Due to irregular sampling of `coords` and the randomness of the rotations - it is possible that some "rotations" may resample with replacement (i.e., - will not be a true permutation). The likelihood of this can be reduced by - either increasing the sampling density of `coords` or changing the - ``method`` parameter (see Notes for more information on the latter). - - Parameters - ---------- - coords : (N, 3) array_like - X, Y, Z coordinates of `N` nodes/parcels/regions/vertices defined on a - sphere - hemiid : (N,) array_like - Array denoting hemisphere designation of coordinates in `coords`, where - values should be {0, 1} denoting the different hemispheres. Rotations - are generated for one hemisphere and mirrored across the y-axis for the - other hemisphere. - n_rotate : int, optional - Number of rotations to generate. Default: 1000 - check_duplicates : bool, optional - Whether to check for and attempt to avoid duplicate resamplings. A - warnings will be raised if duplicates cannot be avoided. Setting to - True may increase the runtime of this function! Default: True - method : {'original', 'vasa', 'hungarian'}, optional - Method by which to match non- and rotated coordinates. Specifying - 'original' will use the method described in [ST1]_. Specfying 'vasa' - will use the method described in [ST4]_. Specfying 'hungarian' will use - the Hungarian algorithm to minimize the global cost of reassignment - (will dramatically increase runtime). Default: 'original' - seed : {int, np.random.RandomState instance, None}, optional - Seed for random number generation. Default: None - verbose : bool, optional - Whether to print occasional status messages. Default: False - return_cost : bool, optional - Whether to return cost array (specified as Euclidean distance) for each - coordinate for each rotation Default: True - - Returns - ------- - spinsamples : (N, `n_rotate`) numpy.ndarray - Resampling matrix to use in permuting data based on supplied `coords`. - cost : (N, `n_rotate`,) numpy.ndarray - Cost (specified as Euclidean distance) of re-assigning each coordinate - for every rotation in `spinsamples`. Only provided if `return_cost` is - True. - - Notes - ----- - By default, this function uses the minimum Euclidean distance between the - original coordinates and the new, rotated coordinates to generate a - resampling array after each spin. Unfortunately, this can (with some - frequency) lead to multiple coordinates being re-assigned the same value: - - >>> from netneurotools import stats as nnstats - >>> coords = [[0, 0, 1], [1, 0, 0], [0, 0, 1], [1, 0, 0]] - >>> hemi = [0, 0, 1, 1] - >>> nnstats.gen_spinsamples(coords, hemi, n_rotate=1, seed=1, - ... method='original', check_duplicates=False) - array([[0], - [0], - [2], - [3]]) - - While this is reasonable in most circumstances, if you feel incredibly - strongly about having a perfect "permutation" (i.e., all indices appear - once and exactly once in the resampling), you can set the ``method`` - parameter to either 'vasa' or 'hungarian': - - >>> nnstats.gen_spinsamples(coords, hemi, n_rotate=1, seed=1, - ... method='vasa', check_duplicates=False) - array([[1], - [0], - [2], - [3]]) - >>> nnstats.gen_spinsamples(coords, hemi, n_rotate=1, seed=1, - ... method='hungarian', check_duplicates=False) - array([[0], - [1], - [2], - [3]]) - - Note that setting this parameter may increase the runtime of the function - (especially for `method='hungarian'`). Refer to [ST1]_ for information on - why the default (i.e., ``exact`` set to False) suffices in most cases. - - For the original MATLAB implementation of this function refer to [ST5]_. - - References - ---------- - .. [ST1] Alexander-Bloch, A., Shou, H., Liu, S., Satterthwaite, T. D., - Glahn, D. C., Shinohara, R. T., Vandekar, S. N., & Raznahan, A. (2018). - On testing for spatial correspondence between maps of human brain - structure and function. NeuroImage, 178, 540-51. - - .. [ST2] Blaser, R., & Fryzlewicz, P. (2016). Random Rotation Ensembles. - Journal of Machine Learning Research, 17(4), 1–26. - - .. [ST3] Lefèvre, J., Pepe, A., Muscato, J., De Guio, F., Girard, N., - Auzias, G., & Germanaud, D. (2018). SPANOL (SPectral ANalysis of Lobes): - A Spectral Clustering Framework for Individual and Group Parcellation of - Cortical Surfaces in Lobes. Frontiers in Neuroscience, 12, 354. - - .. [ST4] Váša, F., Seidlitz, J., Romero-Garcia, R., Whitaker, K. J., - Rosenthal, G., Vértes, P. E., ... & Jones, P. B. (2018). Adolescent - tuning of association cortex in human structural brain networks. - Cerebral Cortex, 28(1), 281-294. - - .. [ST5] https://github.com/spin-test/spin-test - """ - methods = ['original', 'vasa', 'hungarian'] - if method not in methods: - raise ValueError('Provided method "{}" invalid. Must be one of {}.' - .format(method, methods)) - - if exact: - warnings.warn('The `exact` parameter will no longer be supported in ' - 'an upcoming release. Please use the `method` parameter ' - 'instead.', DeprecationWarning, stacklevel=3) - if exact == 'vasa' and method == 'original': - method = 'vasa' - elif exact and method == 'original': - method = 'hungarian' - - seed = check_random_state(seed) - - coords = np.asanyarray(coords) - hemiid = np.squeeze(np.asanyarray(hemiid, dtype='int8')) - - # check supplied coordinate shape - if coords.shape[-1] != 3 or coords.squeeze().ndim != 2: - raise ValueError('Provided `coords` must be of shape (N, 3), not {}' - .format(coords.shape)) - - # ensure hemisphere designation array is correct - if hemiid.ndim != 1: - raise ValueError('Provided `hemiid` array must be one-dimensional.') - if len(coords) != len(hemiid): - raise ValueError('Provided `coords` and `hemiid` must have the same ' - 'length. Provided lengths: coords = {}, hemiid = {}' - .format(len(coords), len(hemiid))) - if np.max(hemiid) > 1 or np.min(hemiid) < 0: - raise ValueError('Hemiid must have values in {0, 1} denoting left and ' - 'right hemisphere coordinates, respectively. ' - + 'Provided array contains values: {}' - .format(np.unique(hemiid))) - - # empty array to store resampling indices - spinsamples = np.zeros((len(coords), n_rotate), dtype=int) - cost = np.zeros((len(coords), n_rotate)) - inds = np.arange(len(coords), dtype=int) - - # generate rotations and resampling array! - msg, warned = '', False - for n in range(n_rotate): - count, duplicated = 0, True - - if verbose: - msg = 'Generating spin {:>5} of {:>5}'.format(n, n_rotate) - print(msg, end='\r', flush=True) - - while duplicated and count < 500: - count, duplicated = count + 1, False - resampled = np.zeros(len(coords), dtype='int32') - - # rotate each hemisphere separately - for h, rot in enumerate(_gen_rotation(seed=seed)): - hinds = (hemiid == h) - coor = coords[hinds] - if len(coor) == 0: - continue - - # if we need an "exact" mapping (i.e., each node needs to be - # assigned EXACTLY once) then we have to calculate the full - # distance matrix which is a nightmare with respect to memory - # for anything that isn't parcellated data. - # that is, don't do this with vertex coordinates! - if method == 'vasa': - dist = spatial.distance_matrix(coor, coor @ rot) - # min of max a la Vasa et al., 2018 - col = np.zeros(len(coor), dtype='int32') - for _ in range(len(dist)): - # find parcel whose closest neighbor is farthest away - # overall; assign to that - row = dist.min(axis=1).argmax() - col[row] = dist[row].argmin() - cost[inds[hinds][row], n] = dist[row, col[row]] - # set to -inf and inf so they can't be assigned again - dist[row] = -np.inf - dist[:, col[row]] = np.inf - # optimization of total cost using Hungarian algorithm. this - # may result in certain parcels having higher cost than with - # `method='vasa'` but should always result in the total cost - # being lower #tradeoffs - elif method == 'hungarian': - dist = spatial.distance_matrix(coor, coor @ rot) - row, col = optimize.linear_sum_assignment(dist) - cost[hinds, n] = dist[row, col] - # if nodes can be assigned multiple targets, we can simply use - # the absolute minimum of the distances (no optimization - # required) which is _much_ lighter on memory - # huge thanks to https://stackoverflow.com/a/47779290 for this - # memory-efficient method - elif method == 'original': - dist, col = spatial.cKDTree(coor @ rot).query(coor, 1) - cost[hinds, n] = dist - - resampled[hinds] = inds[hinds][col] - - # if we want to check for duplicates ensure that we don't have any - if check_duplicates: - if np.any(np.all(resampled[:, None] == spinsamples[:, :n], 0)): - duplicated = True - # if our "spin" is identical to the input then that's no good - elif np.all(resampled == inds): - duplicated = True - - # if we broke out because we tried 500 rotations and couldn't generate - # a new one, warn that we're using duplicate rotations and give up. - # this should only be triggered if check_duplicates is set to True - if count == 500 and not warned: - warnings.warn( - 'Duplicate rotations used. Check resampling array ' - 'to determine real number of unique permutations.', stacklevel=2) - warned = True - - spinsamples[:, n] = resampled - - if verbose: - print(' ' * len(msg) + '\b' * len(msg), end='', flush=True) - - if return_cost: - return spinsamples, cost - - return spinsamples - - -def get_dominance_stats(X, y, use_adjusted_r_sq=True, verbose=False, n_jobs=1): - """ - Return the dominance analysis statistics for multilinear regression. - - This is a rewritten & simplified version of [DA1]_. It is briefly - tested against the original package, but still in early stages. - Please feel free to report any bugs. - - Warning: Still work-in-progress. Parameters might change! - - Parameters - ---------- - X : (N, M) array_like - Input data - y : (N,) array_like - Target values - use_adjusted_r_sq : bool, optional - Whether to use adjusted r squares. Default: True - verbose : bool, optional - Whether to print debug messages. Default: False - n_jobs : int, optional - The number of jobs to run in parallel. Default: 1 - - Returns - ------- - model_metrics : dict - The dominance metrics, currently containing `individual_dominance`, - `partial_dominance`, `total_dominance`, and `full_r_sq`. - model_r_sq : dict - Contains all model r squares - - Notes - ----- - Example usage - - .. code:: python - - from netneurotools.stats import get_dominance_stats - from sklearn.datasets import load_boston - X, y = load_boston(return_X_y=True) - model_metrics, model_r_sq = get_dominance_stats(X, y) - - To compare with [DA1]_, use `use_adjusted_r_sq=False` - - .. code:: python - - from dominance_analysis import Dominance_Datasets - from dominance_analysis import Dominance - boston_dataset=Dominance_Datasets.get_boston() - dominance_regression=Dominance(data=boston_dataset, - target='House_Price',objective=1) - incr_variable_rsquare=dominance_regression.incremental_rsquare() - dominance_regression.dominance_stats() - - References - ---------- - .. [DA1] https://github.com/dominance-analysis/dominance-analysis - - """ - # this helps to remove one element from a tuple - def remove_ret(tpl, elem): - lst = list(tpl) - lst.remove(elem) - return tuple(lst) - - # sklearn linear regression wrapper - def get_reg_r_sq(X, y, use_adjusted_r_sq=True): - lin_reg = LinearRegression() - lin_reg.fit(X, y) - yhat = lin_reg.predict(X) - SS_Residual = sum((y - yhat) ** 2) - SS_Total = sum((y - np.mean(y)) ** 2) - r_squared = 1 - (float(SS_Residual)) / SS_Total - adjusted_r_squared = 1 - (1 - r_squared) * \ - (len(y) - 1) / (len(y) - X.shape[1] - 1) - if use_adjusted_r_sq: - return adjusted_r_squared - else: - return r_squared - - # helper function to compute r_sq for a given idx_tuple - def compute_r_sq(idx_tuple): - return idx_tuple, get_reg_r_sq(X[:, idx_tuple], - y, - use_adjusted_r_sq=use_adjusted_r_sq) - - # generate all predictor combinations in list (num of predictors) of lists - n_predictor = X.shape[-1] - # n_comb_len_group = n_predictor - 1 - predictor_combs = [list(combinations(range(n_predictor), i)) - for i in range(1, n_predictor + 1)] - if verbose: - print(f"[Dominance analysis] Generated \ - {len([v for i in predictor_combs for v in i])} combinations") - - model_r_sq = dict() - results = Parallel(n_jobs=n_jobs)( - delayed(compute_r_sq)(idx_tuple) - for len_group in tqdm(predictor_combs, - desc='num-of-predictor loop', - disable=not verbose) - for idx_tuple in tqdm(len_group, - desc='insider loop', - disable=not verbose)) - - # extract r_sq from results - for idx_tuple, r_sq in results: - model_r_sq[idx_tuple] = r_sq - - if verbose: - print(f"[Dominance analysis] Acquired {len(model_r_sq)} r^2's") - - # getting all model metrics - model_metrics = dict([]) - - # individual dominance - individual_dominance = [] - for i_pred in range(n_predictor): - individual_dominance.append(model_r_sq[(i_pred,)]) - individual_dominance = np.array(individual_dominance).reshape(1, -1) - model_metrics["individual_dominance"] = individual_dominance - - # partial dominance - partial_dominance = [[] for _ in range(n_predictor - 1)] - for i_len in range(n_predictor - 1): - i_len_combs = list(combinations(range(n_predictor), i_len + 2)) - for j_node in range(n_predictor): - j_node_sel = [v for v in i_len_combs if j_node in v] - reduced_list = [remove_ret(comb, j_node) for comb in j_node_sel] - diff_values = [ - model_r_sq[j_node_sel[i]] - model_r_sq[reduced_list[i]] - for i in range(len(reduced_list))] - partial_dominance[i_len].append(np.mean(diff_values)) - - # save partial dominance - partial_dominance = np.array(partial_dominance) - model_metrics["partial_dominance"] = partial_dominance - # get total dominance - total_dominance = np.mean( - np.r_[individual_dominance, partial_dominance], axis=0) - # test and save total dominance - assert np.allclose(total_dominance.sum(), - model_r_sq[tuple(range(n_predictor))]), \ - "Sum of total dominance is not equal to full r square!" - model_metrics["total_dominance"] = total_dominance - # save full r^2 - model_metrics["full_r_sq"] = model_r_sq[tuple(range(n_predictor))] - - return model_metrics, model_r_sq - - -def network_pearsonr(annot1, annot2, weight): - r""" - Calculate pearson correlation between two annotation vectors. - - .. warning:: - Test before use. - - Parameters - ---------- - annot1 : (N,) array_like - First annotation vector, demean will be applied. - annot2 : (N,) array_like - Second annotation vector, demean will be applied. - weight : (N, N) array_like - Weight matrix. Diagonal elements should be 1. - - Returns - ------- - corr : float - Network correlation between `annot1` and `annot2` - - Notes - ----- - If Pearson correlation is represented as - - .. math:: - \rho_{x,y} = \dfrac{ - \mathrm{sum}(I \times (\hat{x} \otimes \hat{y})) - }{ - \sigma_x \sigma_y - } - - The network correlation is defined analogously as - - .. math:: - \rho_{x,y,G} = \dfrac{ - \mathrm{sum}(W \times (\hat{x} \otimes \hat{y})) - }{ - \sigma_{x,W} \sigma_{y,W} - } - - where :math:`\hat{x}` and :math:`\hat{y}` are the demeaned annotation vectors, - - The weight matrix :math:`W` is used to represent the network structure. - It is usually in the form of :math:`W = \\exp(-kL)` where :math:`L` is the - length matrix and :math:`k` is a decay parameter. - - Example using shortest path length as weight - - .. code:: python - - spl, _ = distance_wei_floyd(D) # input should be distance matrix - spl_wei = 1 / np.exp(spl) - netcorr = network_pearsonr(annot1, annot2, spl_wei) - - Example using (inverse) effective resistance as weight - - .. code:: python - - R_eff = effective_resistance(W) - R_eff_norm = R_eff / np.max(R_eff) - W = 1 / R_eff_norm - W = W / np.max(W) - np.fill_diagonal(W, 1.0) - netcorr = network_pearsonr(annot1, annot2, W) - - References - ---------- - .. [1] Coscia, M. (2021). Pearson correlations on complex networks. - Journal of Complex Networks, 9(6), cnab036. - https://doi.org/10.1093/comnet/cnab036 - - - See Also - -------- - netneurotools.stats.network_pearsonr_pairwise - """ - annot1 = annot1 - np.mean(annot1) - annot2 = annot2 - np.mean(annot2) - upper = np.sum(np.multiply(weight, np.outer(annot1, annot2))) - lower1 = np.sum(np.multiply(weight, np.outer(annot1, annot1))) - lower2 = np.sum(np.multiply(weight, np.outer(annot2, annot2))) - return upper / np.sqrt(lower1) / np.sqrt(lower2) - - -def network_pearsonr_numba(annot1, annot2, weight): - """ - Numba version of :meth:`netneurotools.stats.network_pearsonr`. - - .. warning:: - Test before use. - - Parameters - ---------- - annot1 : (N,) array_like - First annotation vector, demean will be applied. - annot2 : (N,) array_like - Second annotation vector, demean will be applied. - weight : (N, N) array_like - Weight matrix. Diagonal elements should be 1. - - Returns - ------- - corr : float - Network correlation between `annot1` and `annot2` - """ - n = annot1.shape[0] - annot1 = annot1 - np.mean(annot1) - annot2 = annot2 - np.mean(annot2) - upper, lower1, lower2 = 0.0, 0.0, 0.0 - for i in range(n): - for j in range(n): - upper += annot1[i] * annot2[j] * weight[i, j] - lower1 += annot1[i] * annot1[j] * weight[i, j] - lower2 += annot2[i] * annot2[j] * weight[i, j] - return upper / np.sqrt(lower1) / np.sqrt(lower2) - - -if use_numba: - network_pearsonr_numba = njit(network_pearsonr_numba) - - -def _cross_outer(annot_mat): - """ - Calculate cross outer product of input matrix. - - This functions is only used in `network_pearsonr_pairwise`. - - Parameters - ---------- - annot_mat : (N, D) array_like - Input matrix - - Returns - ------- - cross_outer : (N, N, D, D) numpy.ndarray - Cross outer product of `annot_mat` - """ - n_samp, n_feat = annot_mat.shape - cross_outer = np.empty((n_samp, n_samp, n_feat, n_feat), annot_mat.dtype) - for a in range(n_samp): - for b in range(n_samp): - for c in range(n_feat): - for d in range(n_feat): - cross_outer[a, b, c, d] = annot_mat[a, c] * annot_mat[b, d] - return cross_outer - - -if use_numba: - # ("float64[:,:,:,::1](float64[:,::1])") - _cross_outer = njit(_cross_outer) - - -def _multiply_sum(cross_outer, weight): - """ - Multiply and sum cross outer product. - - This functions is only used in `network_pearsonr_pairwise`. - - Parameters - ---------- - cross_outer : (N, N, D, D) array_like - Cross outer product of `annot_mat` - weight : (D, D) array_like - Weight matrix - - Returns - ------- - cross_outer_after : (N, N) numpy.ndarray - Result of multiplying and summing `cross_outer` - """ - n_samp, _, n_dim, _ = cross_outer.shape - cross_outer_after = np.empty((n_samp, n_samp), cross_outer.dtype) - for i in range(n_samp): - for j in range(n_samp): - curr_sum = 0.0 - for k in range(n_dim): - for l in range(n_dim): # noqa: E741 - curr_sum += weight[k, l] * cross_outer[i, j, k, l] - cross_outer_after[i, j] = curr_sum - return cross_outer_after - - -if use_numba: - # ("float64[:,::1](float64[:,:,:,::1],float64[:,::1])") - _multiply_sum = njit(_multiply_sum) - - -def network_pearsonr_pairwise(annot_mat, weight): - """ - Calculate pairwise network correlation between rows of `annot_mat`. - - .. warning:: - Test before use. - - Parameters - ---------- - annot_mat : (N, D) array_like - Input matrix - weight : (D, D) array_like - Weight matrix. Diagonal elements should be 1. - - Returns - ------- - corr_mat : (N, N) numpy.ndarray - Pairwise network correlation matrix - - Notes - ----- - This is a faster version of :meth:`netneurotools.stats.network_pearsonr` - for calculating pairwise network correlation between rows of `annot_mat`. - Check :meth:`netneurotools.stats.network_pearsonr` for details. - - See Also - -------- - netneurotools.stats.network_pearsonr - """ - annot_mat_demean = annot_mat - np.mean(annot_mat, axis=1, keepdims=True) - if use_numba: - cross_outer = _cross_outer(annot_mat_demean) - cross_outer_after = _multiply_sum(cross_outer, weight) - else: - # https://stackoverflow.com/questions/24839481/python-matrix-outer-product - cross_outer = np.einsum('ac,bd->abcd', annot_mat_demean, annot_mat_demean) - cross_outer_after = np.sum(np.multiply(cross_outer, weight), axis=(2, 3)) - # translating the two lines below in numba does not speed up much - lower = np.sqrt(np.diagonal(cross_outer_after)) - return cross_outer_after / np.einsum('i,j', lower, lower) - - -def _onehot_quadratic_form_broadcast(Q_star): - """ - Calculate one-hot quadratic form of input matrix. - - This functions is only used in `effective_resistance`. - - Parameters - ---------- - Q_star : (N, N) array_like - Input matrix - - Returns - ------- - R_eff : (N, N) numpy.ndarray - One-hot quadratic form of `Q_star` - """ - n = Q_star.shape[0] - R_eff = np.empty((n, n), Q_star.dtype) - for i in range(n): - for j in range(n): - R_eff[i, j] = Q_star[i, i] - Q_star[j, i] - Q_star[i, j] + Q_star[j, j] - return R_eff - - -if use_numba: - # ("float64[:,::1](float64[:,::1])") - _onehot_quadratic_form_broadcast = njit(_onehot_quadratic_form_broadcast) - - -def effective_resistance(W, directed=True): - """ - Calculate effective resistance matrix. - - The effective resistance between two nodes in a graph, often used in the context - of electrical networks, is a measure that stems from the inverse of the Laplacian - matrix of the graph. - - .. warning:: - Test before use. - - Parameters - ---------- - W : (N, N) array_like - Weight matrix. - directed : bool, optional - Whether the graph is directed. This is used to determine whether to turn on - the :code:`hermitian=True` option in :func:`numpy.linalg.pinv`. When you are - using a symmetric weight matrix (while real-valued implying hermitian), you - can set this to False for better performance. Default: True - - Returns - ------- - R_eff : (N, N) numpy.ndarray - Effective resistance matrix - - Notes - ----- - The effective resistance between two nodes :math:`i` and :math:`j` is defined as - - .. math:: - R_{ij} = (e_i - e_j)^T Q^* (e_i - e_j) - - where :math:`Q^*` is the Moore-Penrose pseudoinverse of the Laplacian matrix - :math:`L` of the graph, and :math:`e_i` is the :math:`i`-th standard basis vector. - - References - ---------- - .. [1] Ellens, W., Spieksma, F. M., Van Mieghem, P., Jamakovic, A., & Kooij, - R. E. (2011). Effective graph resistance. Linear Algebra and Its Applications, - 435(10), 2491–2506. https://doi.org/10.1016/j.laa.2011.02.024 - - See Also - -------- - netneurotools.stats.network_polarisation - """ - L = _graph_laplacian(W) - Q_star = np.linalg.pinv(L, hermitian=not directed) - if use_numba: - R_eff = _onehot_quadratic_form_broadcast(Q_star) - else: - Q_star_diag = np.diag(Q_star) - R_eff = \ - Q_star_diag[:, np.newaxis] \ - - Q_star \ - - Q_star.T \ - + Q_star_diag[np.newaxis, :] - return R_eff - - -def _polariz_diff(vec): - """ - Calculate difference between positive and negative parts of a vector. - - This functions is only used in `network_polarisation`. - - Parameters - ---------- - vec : (N,) array_like - Input vector. Must have both positive and negative values. - - Returns - ------- - vec_diff : (N,) numpy.ndarray - Difference between positive and negative parts of `vec` - """ - # - vec_pos = np.maximum(vec, 0.0) - vec_pos /= np.max(vec_pos) - # - vec_neg = np.minimum(vec, 0.0) - vec_neg = np.abs(vec_neg) - vec_neg /= np.max(vec_neg) - return (vec_pos - vec_neg) - - -if use_numba: - _polariz_diff = njit(_polariz_diff) - - -def _quadratic_form(W, vec_left, vec_right, squared=False): - """ - Calculate quadratic form :math:`v_{left}^T W v_{right}`. - - Parameters - ---------- - W : (N, N) array_like - Input matrix. - vec_left : (N,) array_like - Left weight vector. - vec_right : (N,) array_like - Right weight vector. - squared : bool, optional - Whether to square the input weight matrix. Default: False - - Returns - ------- - quadratic_form : float - Quadratic form from `W`, `vec_left`, and `vec_right` - """ - # [numpy] - - # (vec_left.T @ W @ vec_right)[0, 0] - # [numba] - # vec = np.ascontiguousarray(vec[np.newaxis, :]) - n = W.shape[0] - ret = 0.0 - for i in range(n): - for j in range(n): - if squared: - ret += vec_left[i] * vec_right[j] * W[i, j]**2 - else: - ret += vec_left[i] * vec_right[j] * W[i, j] - return ret - - -if use_numba: - _quadratic_form = njit(_quadratic_form) - - -def network_polarisation(vec, W, directed=True): - r""" - Calculate polarisation of a vector on a graph. - - Network polarisation is a measure of polizzartion taken into account all the - three factors below [1]_: - - - how extreme the opinions of the people are - - how much they organize into echo chambers, and - - how these echo chambers organize in the network - - .. warning:: - Test before use. - - Parameters - ---------- - vec : (N,) array_like - Polarization vector. Must have both positive and negative values. Will be - normalized between -1 and 1 internally. - W : (N, N) array_like - Weight matrix. - directed : bool, optional - Whether the graph is directed. This is used to determine whether to turn on - the :code:`hermitian=True` option in :func:`numpy.linalg.pinv`. When you are - using a symmetric weight matrix (while real-valued implying hermitian), you - can set this to False for better performance. Default: True - - Returns - ------- - polariz : float - Polarization of `vec` on `W` - - Notes - ----- - The measure is based on the genralized Eucledian distance, defined as - - .. math:: - \delta_{G, o} = \sqrt{(o^+ - o^-)^T Q^* (o^+ - o^-)} - - where :math:`o^+` and :math:`o^-` are the positive and negative parts of the - polarization vector, and :math:`Q^*` is the Moore-Penrose pseudoinverse - of the Laplacian matrix :math:`L` of the graph. Check :func:`effective_resistance` - for similarity. - - References - ---------- - .. [1] Hohmann, M., Devriendt, K., & Coscia, M. (2023). Quantifying ideological - polarization on a network using generalized Euclidean distance. Science Advances, - 9(9), eabq2044. https://doi.org/10.1126/sciadv.abq2044 - - See Also - -------- - netneurotools.stats.effective_resistance - """ - L = _graph_laplacian(W) - Q_star = np.linalg.pinv(L, hermitian=not directed) - diff = _polariz_diff(vec) - if use_numba: - polariz_sq = _quadratic_form(Q_star, diff, diff, squared=False) - else: - polariz_sq = (diff.T @ Q_star @ diff) - return np.sqrt(polariz_sq) - - -def network_variance(vec, D): - r""" - Calculate variance of a vector on a graph. - - Network variance is a measure of variance taken into account the network - structure. - - .. warning:: - Test before use. - - Parameters - ---------- - vec : (N,) array_like - Input vector. Must be all positive. - Will be normalized internally as a probability distribution. - D : (N, N) array_like - Distance matrix. - - Returns - ------- - network_variance : float - Network variance of `vec` on `D` - - Notes - ----- - The network variance is defined as - - .. math:: - var(p) = \frac{1}{2} \sum_{i, j} p(i) p(j) d^2(i,j) - - where :math:`p` is the probability distribution of `vec`, and :math:`d(i,j)` - is the distance between node :math:`i` and :math:`j`. - - The distance matrix :math:`D` can make use of effective resistance or its - square root. - - Example using effective resistance as weight matrix - - .. code:: python - - R_eff = effective_resistance(W) - netvar = network_variance(vec, R_eff) - - References - ---------- - .. [1] Devriendt, K., Martin-Gutierrez, S., & Lambiotte, R. (2022). - Variance and covariance of distributions on graphs. SIAM Review, 64(2), - 343–359. https://doi.org/10.1137/20M1361328 - - See Also - -------- - netneurotools.stats.network_covariance - """ - p = vec / np.sum(vec) - return 0.5 * (p.T @ np.multiply(D, D) @ p) - - -def network_variance_numba(vec, D): - """ - Numba version of :meth:`netneurotools.stats.network_variance`. - - Network variance is a measure of variance taken into account the network - structure. - - .. warning:: - Test before use. - - Parameters - ---------- - vec : (N,) array_like - Input vector. Must be all positive. - Will be normalized internally as a probability distribution. - D : (N, N) array_like - Distance matrix. - - Returns - ------- - network_variance : float - Network variance of `vec` on `D` - """ - p = vec / np.sum(vec) - return 0.5 * _quadratic_form(D, p, p, squared=True) - - -if use_numba: - network_variance_numba = njit(network_variance_numba) - - -def network_covariance(joint_pmat, D, calc_marginal=True): - r""" - Calculate covariance of a joint probability matrix on a graph. - - .. warning:: - Test before use. - - Parameters - ---------- - joint_pmat : (N, N) array_like - Joint probability matrix. Please make sure that it is valid. - D : (N, N) array_like - Distance matrix. - calc_marginal : bool, optional - Whether to calculate marginal variance. It will be marginally faster if - :code:`calc_marginal=False` (returning marginal variances as 0). Default: True - - Returns - ------- - network_covariance : float - Covariance of `joint_pmat` on `D` - var_p : float - Marginal variance of `joint_pmat` on `D`. - Will be 0 if :code:`calc_marginal=False` - var_q : float - Marginal variance of `joint_pmat` on `D`. - Will be 0 if :code:`calc_marginal=False` - - Notes - ----- - The network variance is defined as - - .. math:: - cov(P) = \frac{1}{2} \sum_{i, j} [p(i) q(j) - P(i,j)] d^2(i,j) - - where :math:`P` is the joint probability matrix, :math:`p` and :math:`q` - are the marginal probability distributions of `joint_pmat`, and :math:`d(i,j)` - is the distance between node :math:`i` and :math:`j`. - - Check :func:`network_variance` for usage. - - References - ---------- - .. [1] Devriendt, K., Martin-Gutierrez, S., & Lambiotte, R. (2022). - Variance and covariance of distributions on graphs. SIAM Review, 64(2), - 343–359. https://doi.org/10.1137/20M1361328 - - See Also - -------- - netneurotools.stats.network_variance - """ - p = np.sum(joint_pmat, axis=1) - q = np.sum(joint_pmat, axis=0) - D_sq = np.multiply(D, D) - cov = p.T @ D_sq @ q - np.sum(np.multiply(joint_pmat, D_sq)) - if calc_marginal: - var_p = p.T @ D_sq @ p - var_q = q.T @ D_sq @ q - else: - var_p, var_q = 0, 0 - return 0.5 * cov, 0.5 * var_p, 0.5 * var_q - - -def network_covariance_numba(joint_pmat, D, calc_marginal=True): - """ - Numba version of :meth:`netneurotools.stats.network_covariance`. - - .. warning:: - Test before use. - - Parameters - ---------- - joint_pmat : (N, N) array_like - Joint probability matrix. Please make sure that it is valid. - D : (N, N) array_like - Distance matrix. - calc_marginal : bool, optional - Whether to calculate marginal variance. It will be marginally faster if - :code:`calc_marginal=False` (returning marginal variances as 0). Default: True - - Returns - ------- - network_covariance : float - Covariance of `joint_pmat` on `D` - var_p : float - Marginal variance of `joint_pmat` on `D`. - Will be 0 if :code:`calc_marginal=False` - var_q : float - Marginal variance of `joint_pmat` on `D`. - Will be 0 if :code:`calc_marginal=False` - """ - n = joint_pmat.shape[0] - p = np.sum(joint_pmat, axis=1) - q = np.sum(joint_pmat, axis=0) - cov = 0.0 - var_p, var_q = 0.0, 0.0 - for i in range(n): - for j in range(n): - cov += (p[i] * q[j] - joint_pmat[i, j]) * D[i, j]**2 - if calc_marginal: - var_p += p[i] * p[j] * D[i, j]**2 - var_q += q[i] * q[j] * D[i, j]**2 - return 0.5 * cov, 0.5 * var_p, 0.5 * var_q - - -if use_numba: - network_covariance_numba = njit(network_covariance_numba) diff --git a/netneurotools/stats/__init__.py b/netneurotools/stats/__init__.py new file mode 100644 index 0000000..c1fc422 --- /dev/null +++ b/netneurotools/stats/__init__.py @@ -0,0 +1,37 @@ +"""Functions for performing statistical operations.""" + + +from .correlation import ( + efficient_pearsonr, + weighted_pearsonr, + make_correlated_xy +) + + +from .permutation_test import ( + permtest_1samp, + permtest_rel, + permtest_pearsonr +) + + +from .regression import ( + _add_constant, + residualize, + get_dominance_stats +) + + +# from .stats_utils import () + + +__all__ = [ + # correlation + 'efficient_pearsonr', 'weighted_pearsonr', 'make_correlated_xy' + # permutation_test + 'permtest_1samp', 'permtest_rel', 'permtest_pearsonr', + # regression + '_add_constant', 'residualize', 'get_dominance_stats', + # stats_utils +] + diff --git a/netneurotools/datasets/generators.py b/netneurotools/stats/correlation.py similarity index 50% rename from netneurotools/datasets/generators.py rename to netneurotools/stats/correlation.py index 42c4f56..1a2773b 100644 --- a/netneurotools/datasets/generators.py +++ b/netneurotools/stats/correlation.py @@ -1,10 +1,105 @@ - -# -*- coding: utf-8 -*- -"""Functions for making "random" datasets.""" +"""Functions for calculating correlation.""" import numpy as np +import scipy.stats as sstats +import scipy.special as sspecial from sklearn.utils.validation import check_random_state +try: # scipy >= 1.8.0 + from scipy.stats._stats_py import _chk2_asarray +except ImportError: # scipy < 1.8.0 + from scipy.stats.stats import _chk2_asarray + +def efficient_pearsonr(a, b, ddof=1, nan_policy='propagate'): + """ + Compute correlation of matching columns in `a` and `b`. + + Parameters + ---------- + a,b : array_like + Sample observations. These arrays must have the same length and either + an equivalent number of columns or be broadcastable + ddof : int, optional + Degrees of freedom correction in the calculation of the standard + deviation. Default: 1 + nan_policy : bool, optional + Defines how to handle when input contains nan. 'propagate' returns nan, + 'raise' throws an error, 'omit' performs the calculations ignoring nan + values. Default: 'propagate' + + Returns + ------- + corr : float or numpy.ndarray + Pearson's correlation coefficient between matching columns of inputs + pval : float or numpy.ndarray + Two-tailed p-values + + Notes + ----- + If either input contains nan and nan_policy is set to 'omit', both arrays + will be masked to omit the nan entries. + + Examples + -------- + >>> from netneurotools import datasets, stats + + Generate some not-very-correlated and some highly-correlated data: + + >>> np.random.seed(12345678) # set random seed for reproducible results + >>> x1, y1 = datasets.make_correlated_xy(corr=0.1, size=100) + >>> x2, y2 = datasets.make_correlated_xy(corr=0.8, size=100) + + Calculate both correlations simultaneously: + + >>> stats.efficient_pearsonr(np.c_[x1, x2], np.c_[y1, y2]) + (array([0.10032565, 0.79961189]), array([3.20636135e-01, 1.97429944e-23])) + """ + a, b, axis = _chk2_asarray(a, b, 0) + if len(a) != len(b): + raise ValueError('Provided arrays do not have same length') + + if a.size == 0 or b.size == 0: + return np.nan, np.nan + + if nan_policy not in ('propagate', 'raise', 'omit'): + raise ValueError(f'Value for nan_policy "{nan_policy}" not allowed') + + a, b = a.reshape(len(a), -1), b.reshape(len(b), -1) + if (a.shape[1] != b.shape[1]): + a, b = np.broadcast_arrays(a, b) + + mask = np.logical_or(np.isnan(a), np.isnan(b)) + if nan_policy == 'raise' and np.any(mask): + raise ValueError('Input cannot contain NaN when nan_policy is "omit"') + elif nan_policy == 'omit': + # avoid making copies of the data, if possible + a = np.ma.masked_array(a, mask, copy=False, fill_value=np.nan) + b = np.ma.masked_array(b, mask, copy=False, fill_value=np.nan) + + with np.errstate(invalid='ignore'): + corr = (sstats.zscore(a, ddof=ddof, nan_policy=nan_policy) + * sstats.zscore(b, ddof=ddof, nan_policy=nan_policy)) + + sumfunc, n_obs = np.sum, len(a) + if nan_policy == 'omit': + corr = corr.filled(np.nan) + sumfunc = np.nansum + n_obs = np.squeeze(np.sum(np.logical_not(np.isnan(corr)), axis=0)) + + corr = sumfunc(corr, axis=0) / (n_obs - 1) + corr = np.squeeze(np.clip(corr, -1, 1)) / 1 + + # taken from scipy.stats + ab = (n_obs / 2) - 1 + prob = 2 * sspecial.btdtr(ab, ab, 0.5 * (1 - np.abs(corr))) + + return corr, prob + + +def weighted_pearsonr(): + """Calculate weighted Pearson correlation coefficient.""" + pass + def make_correlated_xy(corr=0.85, size=10000, seed=None, tol=0.001): """ diff --git a/netneurotools/stats/permutation_test.py b/netneurotools/stats/permutation_test.py new file mode 100644 index 0000000..9cb5d3c --- /dev/null +++ b/netneurotools/stats/permutation_test.py @@ -0,0 +1,282 @@ +"""Functions for calculating permutation test.""" + +import numpy as np +from sklearn.utils.validation import check_random_state + +try: # scipy >= 1.8.0 + from scipy.stats._stats_py import _chk2_asarray +except ImportError: # scipy < 1.8.0 + from scipy.stats.stats import _chk2_asarray + +from .correlation import efficient_pearsonr + +def permtest_1samp(a, popmean, axis=0, n_perm=1000, seed=0): + """ + Non-parametric equivalent of :py:func:`scipy.stats.ttest_1samp`. + + Generates two-tailed p-value for hypothesis of whether `a` differs from + `popmean` using permutation tests + + Parameters + ---------- + a : array_like + Sample observations + popmean : float or array_like + Expected valued in null hypothesis. If array_like then it must have the + same shape as `a` excluding the `axis` dimension + axis : int or None, optional + Axis along which to compute test. If None, compute over the whole array + of `a`. Default: 0 + n_perm : int, optional + Number of permutations to assess. Unless `a` is very small along `axis` + this will approximate a randomization test via Monte Carlo simulations. + Default: 1000 + seed : {int, np.random.RandomState instance, None}, optional + Seed for random number generation. Set to None for "randomness". + Default: 0 + + Returns + ------- + stat : float or numpy.ndarray + Difference from `popmean` + pvalue : float or numpy.ndarray + Non-parametric p-value + + Notes + ----- + Providing multiple values to `popmean` to run *independent* tests in + parallel is not currently supported. + + The lowest p-value that can be returned by this function is equal to 1 / + (`n_perm` + 1). + + Examples + -------- + >>> from netneurotools import stats + >>> np.random.seed(7654567) # set random seed for reproducible results + >>> rvs = np.random.normal(loc=5, scale=10, size=(50, 2)) + + Test if mean of random sample is equal to true mean, and different mean. We + reject the null hypothesis in the second case and don't reject it in the + first case. + + >>> stats.permtest_1samp(rvs, 5.0) + (array([-0.985602 , -0.05204969]), array([0.48551449, 0.95904096])) + >>> stats.permtest_1samp(rvs, 0.0) + (array([4.014398 , 4.94795031]), array([0.00699301, 0.000999 ])) + + Example using axis and non-scalar dimension for population mean + + >>> stats.permtest_1samp(rvs, [5.0, 0.0]) + (array([-0.985602 , 4.94795031]), array([0.48551449, 0.000999 ])) + >>> stats.permtest_1samp(rvs.T, [5.0, 0.0], axis=1) + (array([-0.985602 , 4.94795031]), array([0.51548452, 0.000999 ])) + """ + a, popmean, axis = _chk2_asarray(a, popmean, axis) + rs = check_random_state(seed) + + if a.size == 0: + return np.nan, np.nan + + # ensure popmean will broadcast to `a` correctly + if popmean.ndim != a.ndim: + popmean = np.expand_dims(popmean, axis=axis) + + # center `a` around `popmean` and calculate original mean + zeroed = a - popmean + true_mean = zeroed.mean(axis=axis) / 1 + abs_mean = np.abs(true_mean) + + # this for loop is not _the fastest_ but is memory efficient + # the broadcasting alt. would mean storing zeroed.size * n_perm in memory + permutations = np.ones(true_mean.shape) + for _ in range(n_perm): + flipped = zeroed * rs.choice([-1, 1], size=zeroed.shape) # sign flip + permutations += np.abs(flipped.mean(axis=axis)) >= abs_mean + + pvals = permutations / (n_perm + 1) # + 1 in denom accounts for true_mean + + return true_mean, pvals + + +def permtest_rel(a, b, axis=0, n_perm=1000, seed=0): + """ + Non-parametric equivalent of :py:func:`scipy.stats.ttest_rel`. + + Generates two-tailed p-value for hypothesis of whether related samples `a` + and `b` differ using permutation tests + + Parameters + ---------- + a, b : array_like + Sample observations. These arrays must have the same shape. + axis : int or None, optional + Axis along which to compute test. If None, compute over whole arrays + of `a` and `b`. Default: 0 + n_perm : int, optional + Number of permutations to assess. Unless `a` and `b` are very small + along `axis` this will approximate a randomization test via Monte + Carlo simulations. Default: 1000 + seed : {int, np.random.RandomState instance, None}, optional + Seed for random number generation. Set to None for "randomness". + Default: 0 + + Returns + ------- + stat : float or numpy.ndarray + Average difference between `a` and `b` + pvalue : float or numpy.ndarray + Non-parametric p-value + + Notes + ----- + The lowest p-value that can be returned by this function is equal to 1 / + (`n_perm` + 1). + + Examples + -------- + >>> from netneurotools import stats + + >>> np.random.seed(12345678) # set random seed for reproducible results + >>> rvs1 = np.random.normal(loc=5, scale=10, size=500) + >>> rvs2 = (np.random.normal(loc=5, scale=10, size=500) + ... + np.random.normal(scale=0.2, size=500)) + >>> stats.permtest_rel(rvs1, rvs2) # doctest: +SKIP + (-0.16506275161572695, 0.8021978021978022) + + >>> rvs3 = (np.random.normal(loc=8, scale=10, size=500) + ... + np.random.normal(scale=0.2, size=500)) + >>> stats.permtest_rel(rvs1, rvs3) # doctest: +SKIP + (2.40533726097883, 0.000999000999000999) + """ + a, b, axis = _chk2_asarray(a, b, axis) + rs = check_random_state(seed) + + if a.shape[axis] != b.shape[axis]: + raise ValueError('Provided arrays do not have same length along axis') + + if a.size == 0 or b.size == 0: + return np.nan, np.nan + + # calculate original difference in means + ab = np.stack([a, b], axis=0) + if ab.ndim < 3: + ab = np.expand_dims(ab, axis=-1) + true_diff = np.squeeze(np.diff(ab, axis=0)).mean(axis=axis) / 1 + abs_true = np.abs(true_diff) + + # idx array + reidx = np.meshgrid(*[range(f) for f in ab.shape], indexing='ij') + + permutations = np.ones(true_diff.shape) + for _ in range(n_perm): + # use this to re-index (i.e., swap along) the first axis of `ab` + swap = rs.random_sample(ab.shape[:-1]).argsort(axis=axis) + reidx[0] = np.repeat(swap[..., np.newaxis], ab.shape[-1], axis=-1) + # recompute difference between `a` and `b` (i.e., first axis of `ab`) + pdiff = np.squeeze(np.diff(ab[tuple(reidx)], axis=0)).mean(axis=axis) + permutations += np.abs(pdiff) >= abs_true + + pvals = permutations / (n_perm + 1) # + 1 in denom accounts for true_diff + + return true_diff, pvals + + +def permtest_pearsonr(a, b, axis=0, n_perm=1000, resamples=None, seed=0): + """ + Non-parametric equivalent of :py:func:`scipy.stats.pearsonr`. + + Generates two-tailed p-value for hypothesis of whether samples `a` and `b` + are correlated using permutation tests + + Parameters + ---------- + a,b : (N[, M]) array_like + Sample observations. These arrays must have the same length and either + an equivalent number of columns or be broadcastable + axis : int or None, optional + Axis along which to compute test. If None, compute over whole arrays + of `a` and `b`. Default: 0 + n_perm : int, optional + Number of permutations to assess. Unless `a` and `b` are very small + along `axis` this will approximate a randomization test via Monte + Carlo simulations. Default: 1000 + resamples : (N, P) array_like, optional + Resampling array used to shuffle `a` when generating null distribution + of correlations. This array must have the same length as `a` and `b` + and should have at least the same number of columns as `n_perm` (if it + has more then only `n_perm` columns will be used. When not specified a + standard permutation is used to shuffle `a`. Default: None + seed : {int, np.random.RandomState instance, None}, optional + Seed for random number generation. Set to None for "randomness". + Default: 0 + + Returns + ------- + corr : float or numpyndarray + Correlations + pvalue : float or numpy.ndarray + Non-parametric p-value + + Notes + ----- + The lowest p-value that can be returned by this function is equal to 1 / + (`n_perm` + 1). + + Examples + -------- + >>> from netneurotools import datasets, stats + + >>> np.random.seed(12345678) # set random seed for reproducible results + >>> x, y = datasets.make_correlated_xy(corr=0.1, size=100) + >>> stats.permtest_pearsonr(x, y) # doctest: +SKIP + (0.10032564626876286, 0.3046953046953047) + + >>> x, y = datasets.make_correlated_xy(corr=0.5, size=100) + >>> stats.permtest_pearsonr(x, y) # doctest: +SKIP + (0.500040365781984, 0.000999000999000999) + + Also works with multiple columns by either broadcasting the smaller array + to the larger: + + >>> z = x + np.random.normal(loc=1, size=100) + >>> stats.permtest_pearsonr(x, np.column_stack([y, z])) + (array([0.50004037, 0.25843187]), array([0.000999 , 0.01098901])) + + or by using matching columns in the two arrays (e.g., `x` and `y` vs + `a` and `b`): + + >>> a, b = datasets.make_correlated_xy(corr=0.9, size=100) + >>> stats.permtest_pearsonr(np.column_stack([x, a]), np.column_stack([y, b])) + (array([0.50004037, 0.89927523]), array([0.000999, 0.000999])) + """ # noqa + a, b, axis = _chk2_asarray(a, b, axis) + rs = check_random_state(seed) + + if len(a) != len(b): + raise ValueError('Provided arrays do not have same length') + + if a.size == 0 or b.size == 0: + return np.nan, np.nan + + if resamples is not None: + if n_perm > resamples.shape[-1]: + raise ValueError('Number of permutations requested exceeds size ' + 'of resampling array.') + + # divide by one forces coercion to float if ndim = 0 + true_corr = efficient_pearsonr(a, b)[0] / 1 + abs_true = np.abs(true_corr) + + permutations = np.ones(true_corr.shape) + for perm in range(n_perm): + # permute `a` and determine whether correlations exceed original + if resamples is None: + ap = a[rs.permutation(len(a))] + else: + ap = a[resamples[:, perm]] + permutations += np.abs(efficient_pearsonr(ap, b)[0]) >= abs_true + + pvals = permutations / (n_perm + 1) # + 1 in denom accounts for true_corr + + return true_corr, pvals \ No newline at end of file diff --git a/netneurotools/stats/regression.py b/netneurotools/stats/regression.py new file mode 100644 index 0000000..13037af --- /dev/null +++ b/netneurotools/stats/regression.py @@ -0,0 +1,257 @@ +"""Functions for calculating regression.""" + +from itertools import combinations + +import numpy as np +from tqdm import tqdm +import scipy.stats as sstats +from joblib import Parallel, delayed +from sklearn.linear_model import LinearRegression +from sklearn.utils.validation import check_array + +def _add_constant(data): + """ + Add a constant (i.e., intercept) term to `data`. + + Parameters + ---------- + data : (N, M) array_like + Samples by features data array + + Returns + ------- + data : (N, F) np.ndarray + Where `F` is `M + 1` + + Examples + -------- + >>> from netneurotools import utils + + >>> A = np.zeros((5, 5)) + >>> Ac = utils.add_constant(A) + >>> Ac + array([[0., 0., 0., 0., 0., 1.], + [0., 0., 0., 0., 0., 1.], + [0., 0., 0., 0., 0., 1.], + [0., 0., 0., 0., 0., 1.], + [0., 0., 0., 0., 0., 1.]]) + """ + data = check_array(data, ensure_2d=False) + return np.column_stack([data, np.ones(len(data))]) + + +def residualize(X, Y, Xc=None, Yc=None, normalize=True, add_intercept=True): + """ + Return residuals of regression equation from `Y ~ X`. + + Parameters + ---------- + X : (N[, R]) array_like + Coefficient matrix of `R` variables for `N` subjects + Y : (N[, F]) array_like + Dependent variable matrix of `F` variables for `N` subjects + Xc : (M[, R]) array_like, optional + Coefficient matrix of `R` variables for `M` subjects. If not specified + then `X` is used to estimate betas. Default: None + Yc : (M[, F]) array_like, optional + Dependent variable matrix of `F` variables for `M` subjects. If not + specified then `Y` is used to estimate betas. Default: None + normalize : bool, optional + Whether to normalize (i.e., z-score) residuals. Will use residuals from + `Yc ~ Xc` for generating mean and variance. Default: True + add_intercept : bool, optional + Whether to add intercept to `X` (and `Xc`, if provided). The intercept + will not be removed, just used in beta estimation. Default: True + + Returns + ------- + Yr : (N, F) numpy.ndarray + Residuals of `Y ~ X` + + Notes + ----- + If both `Xc` and `Yc` are provided, these are used to calculate betas which + are then applied to `X` and `Y`. + """ + if ((Yc is None and Xc is not None) or (Yc is not None and Xc is None)): + raise ValueError('If processing against a comparative group, you must ' + 'provide both `Xc` and `Yc`.') + + X, Y = np.asarray(X), np.asarray(Y) + + if Yc is None: + Xc, Yc = X.copy(), Y.copy() + else: + Xc, Yc = np.asarray(Xc), np.asarray(Yc) + + # add intercept to regressors if requested and calculate fit + if add_intercept: + X, Xc = _add_constant(X), _add_constant(Xc) + betas, *rest = np.linalg.lstsq(Xc, Yc, rcond=None) + + # remove intercept from regressors and betas for calculation of residuals + if add_intercept: + betas = betas[:-1] + X, Xc = X[:, :-1], Xc[:, :-1] + + # calculate residuals + Yr = Y - (X @ betas) + Ycr = Yc - (Xc @ betas) + + if normalize: + Yr = sstats.zmap(Yr, compare=Ycr) + + return Yr + + + + +def get_dominance_stats(X, y, use_adjusted_r_sq=True, verbose=False, n_jobs=1): + """ + Return the dominance analysis statistics for multilinear regression. + + This is a rewritten & simplified version of [DA1]_. It is briefly + tested against the original package, but still in early stages. + Please feel free to report any bugs. + + Warning: Still work-in-progress. Parameters might change! + + Parameters + ---------- + X : (N, M) array_like + Input data + y : (N,) array_like + Target values + use_adjusted_r_sq : bool, optional + Whether to use adjusted r squares. Default: True + verbose : bool, optional + Whether to print debug messages. Default: False + n_jobs : int, optional + The number of jobs to run in parallel. Default: 1 + + Returns + ------- + model_metrics : dict + The dominance metrics, currently containing `individual_dominance`, + `partial_dominance`, `total_dominance`, and `full_r_sq`. + model_r_sq : dict + Contains all model r squares + + Notes + ----- + Example usage + + .. code:: python + + from netneurotools.stats import get_dominance_stats + from sklearn.datasets import load_boston + X, y = load_boston(return_X_y=True) + model_metrics, model_r_sq = get_dominance_stats(X, y) + + To compare with [DA1]_, use `use_adjusted_r_sq=False` + + .. code:: python + + from dominance_analysis import Dominance_Datasets + from dominance_analysis import Dominance + boston_dataset=Dominance_Datasets.get_boston() + dominance_regression=Dominance(data=boston_dataset, + target='House_Price',objective=1) + incr_variable_rsquare=dominance_regression.incremental_rsquare() + dominance_regression.dominance_stats() + + References + ---------- + .. [DA1] https://github.com/dominance-analysis/dominance-analysis + + """ + # this helps to remove one element from a tuple + def remove_ret(tpl, elem): + lst = list(tpl) + lst.remove(elem) + return tuple(lst) + + # sklearn linear regression wrapper + def get_reg_r_sq(X, y, use_adjusted_r_sq=True): + lin_reg = LinearRegression() + lin_reg.fit(X, y) + yhat = lin_reg.predict(X) + SS_Residual = sum((y - yhat) ** 2) + SS_Total = sum((y - np.mean(y)) ** 2) + r_squared = 1 - (float(SS_Residual)) / SS_Total + adjusted_r_squared = 1 - (1 - r_squared) * \ + (len(y) - 1) / (len(y) - X.shape[1] - 1) + if use_adjusted_r_sq: + return adjusted_r_squared + else: + return r_squared + + # helper function to compute r_sq for a given idx_tuple + def compute_r_sq(idx_tuple): + return idx_tuple, get_reg_r_sq(X[:, idx_tuple], + y, + use_adjusted_r_sq=use_adjusted_r_sq) + + # generate all predictor combinations in list (num of predictors) of lists + n_predictor = X.shape[-1] + # n_comb_len_group = n_predictor - 1 + predictor_combs = [list(combinations(range(n_predictor), i)) + for i in range(1, n_predictor + 1)] + if verbose: + print(f"[Dominance analysis] Generated \ + {len([v for i in predictor_combs for v in i])} combinations") + + model_r_sq = dict() + results = Parallel(n_jobs=n_jobs)( + delayed(compute_r_sq)(idx_tuple) + for len_group in tqdm(predictor_combs, + desc='num-of-predictor loop', + disable=not verbose) + for idx_tuple in tqdm(len_group, + desc='insider loop', + disable=not verbose)) + + # extract r_sq from results + for idx_tuple, r_sq in results: + model_r_sq[idx_tuple] = r_sq + + if verbose: + print(f"[Dominance analysis] Acquired {len(model_r_sq)} r^2's") + + # getting all model metrics + model_metrics = dict([]) + + # individual dominance + individual_dominance = [] + for i_pred in range(n_predictor): + individual_dominance.append(model_r_sq[(i_pred,)]) + individual_dominance = np.array(individual_dominance).reshape(1, -1) + model_metrics["individual_dominance"] = individual_dominance + + # partial dominance + partial_dominance = [[] for _ in range(n_predictor - 1)] + for i_len in range(n_predictor - 1): + i_len_combs = list(combinations(range(n_predictor), i_len + 2)) + for j_node in range(n_predictor): + j_node_sel = [v for v in i_len_combs if j_node in v] + reduced_list = [remove_ret(comb, j_node) for comb in j_node_sel] + diff_values = [ + model_r_sq[j_node_sel[i]] - model_r_sq[reduced_list[i]] + for i in range(len(reduced_list))] + partial_dominance[i_len].append(np.mean(diff_values)) + + # save partial dominance + partial_dominance = np.array(partial_dominance) + model_metrics["partial_dominance"] = partial_dominance + # get total dominance + total_dominance = np.mean( + np.r_[individual_dominance, partial_dominance], axis=0) + # test and save total dominance + assert np.allclose(total_dominance.sum(), + model_r_sq[tuple(range(n_predictor))]), \ + "Sum of total dominance is not equal to full r square!" + model_metrics["total_dominance"] = total_dominance + # save full r^2 + model_metrics["full_r_sq"] = model_r_sq[tuple(range(n_predictor))] + + return model_metrics, model_r_sq diff --git a/netneurotools/stats/stats_utils.py b/netneurotools/stats/stats_utils.py new file mode 100644 index 0000000..2d16837 --- /dev/null +++ b/netneurotools/stats/stats_utils.py @@ -0,0 +1,3 @@ +"""Functions for supporting statistics.""" + + diff --git a/netneurotools/stats/tests/__init__.py b/netneurotools/stats/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/netneurotools/stats/tests/test_correlation.py b/netneurotools/stats/tests/test_correlation.py new file mode 100644 index 0000000..725bcae --- /dev/null +++ b/netneurotools/stats/tests/test_correlation.py @@ -0,0 +1,66 @@ +"""For testing netneurotools.stats.correlation functionality.""" + +import pytest +import numpy as np +from netneurotools import stats + +@pytest.mark.parametrize('x, y, expected', [ + # basic one-dimensional input + (range(5), range(5), (1.0, 0.0)), + # broadcasting occurs regardless of input order + (np.stack([range(5), range(5, 0, -1)], 1), range(5), + ([1.0, -1.0], [0.0, 0.0])), + (range(5), np.stack([range(5), range(5, 0, -1)], 1), + ([1.0, -1.0], [0.0, 0.0])), + # correlation between matching columns + (np.stack([range(5), range(5, 0, -1)], 1), + np.stack([range(5), range(5, 0, -1)], 1), + ([1.0, 1.0], [0.0, 0.0])) +]) +def test_efficient_pearsonr(x, y, expected): + """Test efficient_pearsonr function.""" + assert np.allclose(stats.efficient_pearsonr(x, y), expected) + + +def test_efficient_pearsonr_errors(): + """Test efficient_pearsonr function errors.""" + with pytest.raises(ValueError): + stats.efficient_pearsonr(range(4), range(5)) + + assert all(np.isnan(a) for a in stats.efficient_pearsonr([], [])) + + +@pytest.mark.parametrize('corr, size, tol, seed', [ + (0.85, (1000,), 0.05, 1234), + (0.85, (1000, 1000), 0.05, 1234), + ([[1, 0.5, 0.3], [0.5, 1, 0], [0.3, 0, 1]], (1000,), 0.05, 1234) +]) +def test_make_correlated_xy(corr, size, tol, seed): + """Test make_correlated_xy function.""" + out = stats.make_correlated_xy(corr=corr, size=size, + tol=tol, seed=seed) + # ensure output is expected shape + assert out.shape[1:] == size + assert len(out) == len(corr) if hasattr(corr, '__len__') else 2 + + # check outputs are correlated within specified tolerance + realcorr = np.corrcoef(out.reshape(len(out), -1)) + if len(realcorr) == 2 and not hasattr(corr, '__len__'): + realcorr = realcorr[0, 1] + assert np.all(np.abs(realcorr - corr) < tol) + + # check that seed generates reproducible values + duplicate = stats.make_correlated_xy(corr=corr, size=size, + tol=tol, seed=seed) + assert np.allclose(out, duplicate) + + +@pytest.mark.parametrize('corr', [ + (1.5), (-1.5), # outside range of [-1, 1] + ([0.85]), ([[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]]), # not 2D / square array + ([[0.85]]), ([[1, 0.5], [0.5, 0.5]]) # diagonal not equal to 1 +]) +def test_make_correlated_xy_errors(corr): + """Test make_correlated_xy function errors.""" + with pytest.raises(ValueError): + stats.make_correlated_xy(corr) diff --git a/netneurotools/stats/tests/test_permutation.py b/netneurotools/stats/tests/test_permutation.py new file mode 100644 index 0000000..71411b0 --- /dev/null +++ b/netneurotools/stats/tests/test_permutation.py @@ -0,0 +1,65 @@ +"""For testing netneurotools.stats.permutation_test functionality.""" + +import pytest +import numpy as np +from netneurotools import stats + +@pytest.mark.xfail +def test_permtest_1samp(): + """Test permutation test for one-sample t-test.""" + assert False + # n1, n2, n3 = 10, 15, 20 + # rs = np.random.RandomState(1234) + # rvn1 = rs.normal(loc=8, scale=10, size=(n1, n2, n3)) + + # t1, p1 = stats.permtest_1samp(rvn1, 1, axis=0) + + +def test_permtest_rel(): + """Test permutation test for paired samples.""" + dr, pr = -0.0005, 0.4175824175824176 + dpr = ([dr, -dr], [pr, pr]) + + rvs1 = np.linspace(1, 100, 100) + rvs2 = np.linspace(1.01, 99.989, 100) + rvs1_2D = np.array([rvs1, rvs2]) + rvs2_2D = np.array([rvs2, rvs1]) + + # the p-values in these two cases should be consistent + d, p = stats.permtest_rel(rvs1, rvs2, axis=0, seed=1234) + assert np.allclose([d, p], (dr, pr)) + d, p = stats.permtest_rel(rvs1_2D.T, rvs2_2D.T, axis=0, seed=1234) + assert np.allclose([d, p], dpr) + + # but the p-value will differ here because of _how_ we're drawing the + # random permutations... it would be nice if this was consistent, but as + # yet i don't have a great idea on how to make that happen without assuming + # a whole lot about the data + pr = 0.51248751 + tpr = ([dr, -dr], [pr, pr]) + d, p = stats.permtest_rel(rvs1_2D, rvs2_2D, axis=1, seed=1234) + assert np.allclose([d, p], tpr) + + +def test_permtest_pearsonr(): + """Test permutation test for Pearson correlation.""" + np.random.seed(12345678) + x, y = stats.make_correlated_xy(corr=0.1, size=100) + r, p = stats.permtest_pearsonr(x, y) + assert np.allclose([r, p], [0.10032564626876286, 0.3046953046953047]) + + x, y = stats.make_correlated_xy(corr=0.5, size=100) + r, p = stats.permtest_pearsonr(x, y) + assert np.allclose([r, p], [0.500040365781984, 0.000999000999000999]) + + z = x + np.random.normal(loc=1, size=100) + r, p = stats.permtest_pearsonr(x, np.column_stack([y, z])) + assert np.allclose(r, np.array([0.50004037, 0.25843187])) + assert np.allclose(p, np.array([0.000999, 0.01098901])) + + a, b = stats.make_correlated_xy(corr=0.9, size=100) + r, p = stats.permtest_pearsonr(np.column_stack([x, a]), + np.column_stack([y, b])) + assert np.allclose(r, np.array([0.50004037, 0.89927523])) + assert np.allclose(p, np.array([0.000999, 0.000999])) + diff --git a/netneurotools/stats/tests/test_regression.py b/netneurotools/stats/tests/test_regression.py new file mode 100644 index 0000000..bc8b7ed --- /dev/null +++ b/netneurotools/stats/tests/test_regression.py @@ -0,0 +1,14 @@ +"""For testing netneurotools.stats.regression functionality.""" + +import numpy as np +from netneurotools import stats + + +def test_add_constant(): + """Test adding a constant to a 1D or 2D array.""" + # if provided a vector it will return a 2D array + assert stats._add_constant(np.random.rand(100)).shape == (100, 2) + + # if provided a 2D array it will return the same, extended by 1 column + out = stats._add_constant(np.random.rand(100, 100)) + assert out.shape == (100, 101) and np.all(out[:, -1] == 1) \ No newline at end of file diff --git a/netneurotools/surface.py b/netneurotools/surface.py deleted file mode 100644 index e43ca16..0000000 --- a/netneurotools/surface.py +++ /dev/null @@ -1,189 +0,0 @@ -"""Functions for constructing graphs from surface meshes.""" - -import numpy as np -from scipy import sparse - - -def _get_edges(faces): - """ - Get set of edges from `faces`. - - Parameters - ---------- - faces : (F, 3) array_like - Set of indices creating triangular faces of a mesh - - Returns - ------- - edges : (F*3, 2) array_like - All edges in `faces` - """ - faces = np.asarray(faces) - edges = np.sort(faces[:, [0, 1, 1, 2, 2, 0]].reshape((-1, 2)), axis=1) - - return edges - - -def get_direct_edges(vertices, faces): - """ - Get (unique) direct edges and weights in mesh describes by inputs. - - Parameters - ---------- - vertices : (N, 3) array_like - Coordinates of `vertices` comprising mesh with `faces` - faces : (F, 3) array_like - Indices of `vertices` that compose triangular faces of mesh - - Returns - ------- - edges : (E, 2) array_like - Indices of `vertices` comprising direct edges (without duplicates) - weights : (E, 1) array_like - Distances between `edges` - - """ - edges = np.unique(_get_edges(faces), axis=0) - weights = np.linalg.norm(np.diff(vertices[edges], axis=1), axis=-1) - return edges, weights.squeeze() - - -def get_indirect_edges(vertices, faces): - """ - Get indirect edges and weights in mesh described by inputs. - - Indirect edges are between two vertices that participate in faces sharing - an edge - - Parameters - ---------- - vertices : (N, 3) array_like - Coordinates of `vertices` comprising mesh with `faces` - faces : (F, 3) array_like - Indices of `vertices` that compose triangular faces of mesh - - Returns - ------- - edges : (E, 2) array_like - Indices of `vertices` comprising indirect edges (without duplicates) - weights : (E, 1) array_like - Distances between `edges` on surface - - References - ---------- - https://github.com/mikedh/trimesh (MIT licensed) - - """ - # first generate the list of edges for the provided faces and the - # index for which face the edge is from (which is just the index of the - # face repeated thrice, since each face generates three direct edges) - edges = _get_edges(faces) - edges_face = np.repeat(np.arange(len(faces)), 3) - - # every edge appears twice in a watertight surface, so we'll first get the - # indices for each duplicate edge in `edges` (this should, assuming all - # goes well, have rows equal to len(edges) // 2) - order = np.lexsort(edges.T[::-1]) - edges_sorted = edges[order] - dupe = np.any(edges_sorted[1:] != edges_sorted[:-1], axis=1) - dupe_idx = np.append(0, np.nonzero(dupe)[0] + 1) - start_ok = np.diff(np.concatenate((dupe_idx, [len(edges_sorted)]))) == 2 - groups = np.tile(dupe_idx[start_ok].reshape(-1, 1), 2) - edge_groups = order[groups + np.arange(2)] - - # now, get the indices of the faces that participate in these duplicate - # edges, as well as the edges themselves - adjacency = edges_face[edge_groups] - nondegenerate = adjacency[:, 0] != adjacency[:, 1] - adjacency = np.sort(adjacency[nondegenerate], axis=1) - adjacency_edges = edges[edge_groups[:, 0][nondegenerate]] - - # the non-shared vertex index is the same shape as adjacency, holding - # vertex indices vs face indices - indirect_edges = np.zeros(adjacency.shape, dtype=np.int32) - 1 - - # loop through the two columns of adjacency - for i, fid in enumerate(adjacency.T): - # faces from the current column of adjacency - face = faces[fid] - # get index of vertex not included in shared edge - unshared = np.logical_not(np.logical_or( - face == adjacency_edges[:, 0].reshape(-1, 1), - face == adjacency_edges[:, 1].reshape(-1, 1))) - # each row should have one "uncontained" vertex; ignore degenerates - row_ok = unshared.sum(axis=1) == 1 - unshared[~row_ok, :] = False - indirect_edges[row_ok, i] = face[unshared] - - # get vertex coordinates of triangles pairs with shared edges, ordered - # such that the non-shared vertex is always _last_ among the trio - shared = np.sort(face[np.logical_not(unshared)].reshape(-1, 1, 2), axis=-1) - shared = np.repeat(shared, 2, axis=1) - triangles = np.concatenate((shared, indirect_edges[..., None]), axis=-1) - # `A.shape`: (3, N, 2) corresponding to (xyz coords, edges, triangle pairs) - A, B, V = vertices[triangles].transpose(2, 3, 0, 1) - - # calculate the xyz coordinates of the foot of each triangle, where the - # base is the shared edge - # that is, we're trying to calculate F in the equation `VF = VB - (w * BA)` - # where `VF`, `VB`, and `BA` are vectors, and `w = (AB * VB) / (AB ** 2)` - w = (np.sum((A - B) * (V - B), axis=0, keepdims=True) - / np.sum((A - B) ** 2, axis=0, keepdims=True)) - feet = B - (w * (B - A)) - # calculate coordinates of midpoint b/w the feet of each pair of triangles - midpoints = (np.sum(feet.transpose(1, 2, 0), axis=1) / 2)[:, None] - # calculate Euclidean distance between non-shared vertices and midpoints - # and add distances together for each pair of triangles - norms = np.linalg.norm(vertices[indirect_edges] - midpoints, axis=-1) - weights = np.sum(norms, axis=-1) - - # NOTE: weights won't be perfectly accurate for a small subset of triangle - # pairs where either triangle has angle >90 along the shared edge. in these - # the midpoint lies _outside_ the shared edge, so neighboring triangles - # would need to be taken into account. that said, this occurs in only a - # minority of cases and the difference tends to be in the ~0.001 mm range - return indirect_edges, weights - - -def make_surf_graph(vertices, faces, mask=None): - """ - Construct adjacency graph from `surf`. - - Parameters - ---------- - vertices : (N, 3) array_like - Coordinates of `vertices` comprising mesh with `faces` - faces : (F, 3) array_like - Indices of `vertices` that compose triangular faces of mesh - mask : (N,) array_like, optional (default None) - Boolean mask indicating which vertices should be removed from generated - graph. If not supplied, all vertices are used. - - Returns - ------- - graph : scipy.sparse.csr_matrix - Sparse matrix representing graph of `vertices` and `faces` - - Raises - ------ - ValueError : inconsistent number of vertices in `mask` and `vertices` - """ - if mask is not None and len(mask) != len(vertices): - raise ValueError('Supplied `mask` array has different number of ' - 'vertices than supplied `vertices`.') - - # get all (direct + indirect) edges from surface - direct_edges, direct_weights = get_direct_edges(vertices, faces) - indirect_edges, indirect_weights = get_indirect_edges(vertices, faces) - edges = np.vstack((direct_edges, indirect_edges)) - weights = np.hstack((direct_weights, indirect_weights)) - - # remove edges that include a vertex in `mask` - if mask is not None: - idx, = np.where(mask) - mask = ~np.any(np.isin(edges, idx), axis=1) - edges, weights = edges[mask], weights[mask] - - # construct our graph on which to calculate shortest paths - return sparse.csr_matrix((np.squeeze(weights), (edges[:, 0], edges[:, 1])), - shape=(len(vertices), len(vertices))) diff --git a/netneurotools/tests/test_civet.py b/netneurotools/tests/test_civet.py deleted file mode 100644 index 9a6a1ab..0000000 --- a/netneurotools/tests/test_civet.py +++ /dev/null @@ -1,30 +0,0 @@ -# -*- coding: utf-8 -*- -"""For testing netneurotools.civet functionality.""" - -import numpy as np -import pytest - -from netneurotools import civet, datasets - - -@pytest.fixture(scope='module') -def civet_surf(tmp_path_factory): - tmpdir = str(tmp_path_factory.getbasetemp()) - return datasets.fetch_civet(data_dir=tmpdir, verbose=0)['mid'] - - -def test_read_civet(civet_surf): - vertices, triangles = civet.read_civet(civet_surf.lh) - assert len(vertices) == 40962 - assert len(triangles) == 81920 - assert np.all(triangles.max(axis=0) < vertices.shape[0]) - - -def test_civet_to_freesurfer(): - brainmap = np.random.rand(81924) - out = civet.civet_to_freesurfer(brainmap) - out2 = civet.civet_to_freesurfer(brainmap, method='linear') - assert out.shape[0] == out2.shape[0] == 81924 - - with pytest.raises(ValueError): - civet.civet_to_freesurfer(np.random.rand(10)) diff --git a/netneurotools/tests/test_freesurfer.py b/netneurotools/tests/test_freesurfer.py deleted file mode 100644 index 53bf74e..0000000 --- a/netneurotools/tests/test_freesurfer.py +++ /dev/null @@ -1,82 +0,0 @@ -# -*- coding: utf-8 -*- -"""For testing netneurotools.freesurfer functionality.""" - -import numpy as np -import pytest - -from netneurotools import datasets, freesurfer - - -@pytest.fixture(scope='module') -def cammoun_surf(tmp_path_factory): - tmpdir = str(tmp_path_factory.getbasetemp()) - return datasets.fetch_cammoun2012('fsaverage5', data_dir=tmpdir, verbose=0) - - -@pytest.mark.parametrize('method', [ - 'average', 'surface', 'geodesic' -]) -@pytest.mark.parametrize('scale, parcels, n_right', [ - ('scale033', 68, 34), - ('scale060', 114, 57), - ('scale125', 219, 108), - ('scale250', 448, 223), - ('scale500', 1000, 501), -]) -def test_find_parcel_centroids(cammoun_surf, scale, parcels, n_right, method): - lh, rh = cammoun_surf[scale] - - coords, hemi = freesurfer.find_parcel_centroids(lhannot=lh, rhannot=rh, - method=method, - version='fsaverage5') - assert len(coords) == parcels - assert len(hemi) == parcels - assert np.sum(hemi) == n_right - - -@pytest.mark.parametrize('scale, parcels', [ - ('scale033', 68), - ('scale060', 114), - ('scale125', 219), - ('scale250', 448), - ('scale500', 1000), -]) -def test_project_reduce_vertices(cammoun_surf, scale, parcels): - # these functions are partners and should be tested in concert. - # we can test all the normal functionality and also ensure that "round - # trips" work as expected - - # generate "parcellated" data - data = np.random.rand(parcels) - lh, rh = cammoun_surf[scale] - - # do we get the expected number of vertices in our projection? - projected = freesurfer.parcels_to_vertices(data, rhannot=rh, lhannot=lh) - assert len(projected) == 20484 - - # does reduction return our input data, as expected? - reduced = freesurfer.vertices_to_parcels(projected, rhannot=rh, lhannot=lh) - assert np.allclose(data, reduced) - - # can we do this with multi-dimensional data, too? - data = np.random.rand(parcels, 2) - projected = freesurfer.parcels_to_vertices(data, rhannot=rh, lhannot=lh) - assert projected.shape == (20484, 2) - reduced = freesurfer.vertices_to_parcels(projected, rhannot=rh, lhannot=lh) - assert np.allclose(data, reduced) - - # what about int arrays as input? - data = np.random.choice(10, size=parcels) - projected = freesurfer.parcels_to_vertices(data, rhannot=rh, lhannot=lh) - reduced = freesurfer.vertices_to_parcels(projected, rhannot=rh, lhannot=lh) - assert np.allclose(reduced, data) - - # number of parcels != annotation spec - with pytest.raises(ValueError): - freesurfer.parcels_to_vertices(np.random.rand(parcels + 1), - rhannot=rh, lhannot=lh) - - # number of vertices != annotation spec - with pytest.raises(ValueError): - freesurfer.vertices_to_parcels(np.random.rand(20485), - rhannot=rh, lhannot=lh) diff --git a/netneurotools/tests/test_modularity.py b/netneurotools/tests/test_modularity.py deleted file mode 100644 index 4018ce0..0000000 --- a/netneurotools/tests/test_modularity.py +++ /dev/null @@ -1,44 +0,0 @@ -# -*- coding: utf-8 -*- -"""For testing netneurotools.modularity functionality.""" - -import numpy as np - -from netneurotools import modularity - -rs = np.random.RandomState(1234) - - -def test_dummyvar(): - # generate small example dummy variable code - out = modularity._dummyvar(np.array([1, 1, 2, 3, 3])) - assert np.all(out == np.array([[1, 0, 0], - [1, 0, 0], - [0, 1, 0], - [0, 0, 1], - [0, 0, 1]])) - - allones = np.array([1, 1, 1, 1, 1, 1, 1, 1]) - assert np.all(modularity._dummyvar(allones) == allones) - - -def test_zrand(): - # make the same two-group community assignments (with different labels) - label = np.ones((100, 1)) - X, Y = np.vstack((label, label * 2)), np.vstack((label * 2, label)) - # compare - assert modularity.zrand(X, Y) == modularity.zrand(X, Y[::-1]) - random = rs.choice([0, 1], size=X.shape) - assert modularity.zrand(X, Y) > modularity.zrand(X, random) - assert modularity.zrand(X, Y) == modularity.zrand(X[:, 0], Y[:, 0]) - - -def test_zrand_partitions(): - # make random communities - comm = rs.choice(range(6), size=(10, 100)) - all_diff = modularity._zrand_partitions(comm) - all_same = modularity._zrand_partitions(np.repeat(comm[:, [0]], 10, axis=1)) - - # partition of labels that are all the same should have higher average - # zrand and lower stdev zrand - assert np.nanmean(all_same) > np.nanmean(all_diff) - assert np.nanstd(all_same) < np.nanstd(all_diff) diff --git a/netneurotools/tests/test_plotting.py b/netneurotools/tests/test_plotting.py deleted file mode 100644 index de35811..0000000 --- a/netneurotools/tests/test_plotting.py +++ /dev/null @@ -1,58 +0,0 @@ -# -*- coding: utf-8 -*- -"""For testing netneurotools.plotting functionality.""" - -import matplotlib.pyplot as plt -import numpy as np - -from netneurotools import datasets, plotting -import pytest - - -def test_grid_communities(): - comms = np.asarray([0, 0, 0, 0, 1, 1, 1, 1, 2, 2]) - # check that comms with / without 0 community label yields same output - assert np.allclose(plotting._grid_communities(comms), [0, 4, 8, 10]) - assert np.allclose(plotting._grid_communities(comms + 1), [0, 4, 8, 10]) - - -def test_sort_communities(): - data = np.arange(9).reshape(3, 3) - comms = np.asarray([0, 0, 2]) - # check that comms with / without 0 community label yields same output - assert np.allclose(plotting.sort_communities(data, comms), [1, 0, 2]) - assert np.allclose(plotting.sort_communities(data, comms + 1), [1, 0, 2]) - - -def test_plot_mod_heatmap(): - data = np.random.rand(100, 100) - comms = np.random.choice(4, size=(100,)) - ax = plotting.plot_mod_heatmap(data, comms) - assert isinstance(ax, plt.Axes) - - -@pytest.mark.filterwarnings('ignore') -def test_plot_fsvertex(): - surfer = pytest.importorskip('surfer') - - data = np.random.rand(20484) - brain = plotting.plot_fsvertex(data, subject_id='fsaverage5', - offscreen=True) - assert isinstance(brain, surfer.Brain) - - -@pytest.mark.filterwarnings('ignore') -def test_plot_fsaverage(): - surfer = pytest.importorskip('surfer') - - data = np.random.rand(68) - lhannot, rhannot = datasets.fetch_cammoun2012('fsaverage5')['scale033'] - brain = plotting.plot_fsaverage(data, lhannot=lhannot, rhannot=rhannot, - subject_id='fsaverage5', offscreen=True) - assert isinstance(brain, surfer.Brain) - - -def test_plot_point_brain(): - data = np.random.rand(100) - coords = np.random.rand(100, 3) - out = plotting.plot_point_brain(data, coords) - assert isinstance(out, plt.Figure) diff --git a/netneurotools/tests/test_stats.py b/netneurotools/tests/test_stats.py deleted file mode 100644 index 8730bfc..0000000 --- a/netneurotools/tests/test_stats.py +++ /dev/null @@ -1,172 +0,0 @@ -# -*- coding: utf-8 -*- -"""For testing netneurotools.stats functionality.""" - -import itertools -import numpy as np -import pytest - -from netneurotools import datasets, stats - - -@pytest.mark.xfail -def test_permtest_1samp(): - assert False - # n1, n2, n3 = 10, 15, 20 - # rs = np.random.RandomState(1234) - # rvn1 = rs.normal(loc=8, scale=10, size=(n1, n2, n3)) - - # t1, p1 = stats.permtest_1samp(rvn1, 1, axis=0) - - -def test_permtest_rel(): - dr, pr = -0.0005, 0.4175824175824176 - dpr = ([dr, -dr], [pr, pr]) - - rvs1 = np.linspace(1, 100, 100) - rvs2 = np.linspace(1.01, 99.989, 100) - rvs1_2D = np.array([rvs1, rvs2]) - rvs2_2D = np.array([rvs2, rvs1]) - - # the p-values in these two cases should be consistent - d, p = stats.permtest_rel(rvs1, rvs2, axis=0, seed=1234) - assert np.allclose([d, p], (dr, pr)) - d, p = stats.permtest_rel(rvs1_2D.T, rvs2_2D.T, axis=0, seed=1234) - assert np.allclose([d, p], dpr) - - # but the p-value will differ here because of _how_ we're drawing the - # random permutations... it would be nice if this was consistent, but as - # yet i don't have a great idea on how to make that happen without assuming - # a whole lot about the data - pr = 0.51248751 - tpr = ([dr, -dr], [pr, pr]) - d, p = stats.permtest_rel(rvs1_2D, rvs2_2D, axis=1, seed=1234) - assert np.allclose([d, p], tpr) - - -def test_permtest_pearsonr(): - np.random.seed(12345678) - x, y = datasets.make_correlated_xy(corr=0.1, size=100) - r, p = stats.permtest_pearsonr(x, y) - assert np.allclose([r, p], [0.10032564626876286, 0.3046953046953047]) - - x, y = datasets.make_correlated_xy(corr=0.5, size=100) - r, p = stats.permtest_pearsonr(x, y) - assert np.allclose([r, p], [0.500040365781984, 0.000999000999000999]) - - z = x + np.random.normal(loc=1, size=100) - r, p = stats.permtest_pearsonr(x, np.column_stack([y, z])) - assert np.allclose(r, np.array([0.50004037, 0.25843187])) - assert np.allclose(p, np.array([0.000999, 0.01098901])) - - a, b = datasets.make_correlated_xy(corr=0.9, size=100) - r, p = stats.permtest_pearsonr(np.column_stack([x, a]), - np.column_stack([y, b])) - assert np.allclose(r, np.array([0.50004037, 0.89927523])) - assert np.allclose(p, np.array([0.000999, 0.000999])) - - -@pytest.mark.parametrize('x, y, expected', [ - # basic one-dimensional input - (range(5), range(5), (1.0, 0.0)), - # broadcasting occurs regardless of input order - (np.stack([range(5), range(5, 0, -1)], 1), range(5), - ([1.0, -1.0], [0.0, 0.0])), - (range(5), np.stack([range(5), range(5, 0, -1)], 1), - ([1.0, -1.0], [0.0, 0.0])), - # correlation between matching columns - (np.stack([range(5), range(5, 0, -1)], 1), - np.stack([range(5), range(5, 0, -1)], 1), - ([1.0, 1.0], [0.0, 0.0])) -]) -def test_efficient_pearsonr(x, y, expected): - assert np.allclose(stats.efficient_pearsonr(x, y), expected) - - -def test_efficient_pearsonr_errors(): - with pytest.raises(ValueError): - stats.efficient_pearsonr(range(4), range(5)) - - assert all(np.isnan(a) for a in stats.efficient_pearsonr([], [])) - - -def test_gen_rotation(): - # make a few rotations (some same / different) - rout1, lout1 = stats._gen_rotation(seed=1234) - rout2, lout2 = stats._gen_rotation(seed=1234) - rout3, lout3 = stats._gen_rotation(seed=5678) - - # confirm consistency with the same seed - assert np.allclose(rout1, rout2) and np.allclose(lout1, lout2) - - # confirm inconsistency with different seeds - assert not np.allclose(rout1, rout3) and not np.allclose(lout1, lout3) - - # confirm reflection across L/R hemispheres as expected - # also confirm min/max never exceeds -1/1 - reflected = np.array([[1, -1, -1], [-1, 1, 1], [-1, 1, 1]]) - for r, l in zip([rout1, rout3], [lout1, lout3]): # noqa: E741 - assert np.allclose(r / l, reflected) - assert r.max() < 1 and r.min() > -1 and l.max() < 1 and l.min() > -1 - - -def _get_sphere_coords(s, t, r=1): - """Get coordinates at angles `s` and `t` a sphere of radius `r`.""" - # convert to radians - rad = np.pi / 180 - s, t = s * rad, t * rad - - # calculate new points - x = r * np.cos(s) * np.sin(t) - y = r * np.sin(s) * np.cos(t) - z = r * np.cos(t) - - return x, y, z - - -def test_gen_spinsamples(): - # grab a few points from a spherical surface and duplicate it for the - # "other hemisphere" - coords = [_get_sphere_coords(s, t, r=1) for s, t in - itertools.product(range(0, 360, 45), range(0, 360, 45))] - coords = np.vstack([coords, coords]) - hemi = np.hstack([np.zeros(len(coords) // 2), np.ones(len(coords) // 2)]) - - # generate "normal" test spins - spins, cost = stats.gen_spinsamples(coords, hemi, n_rotate=10, seed=1234, - return_cost=True) - assert spins.shape == spins.shape == (len(coords), 10) - - # confirm that `method` parameter functions as desired - for method in ['vasa', 'hungarian']: - spin_exact, cost_exact = stats.gen_spinsamples(coords, hemi, - n_rotate=10, seed=1234, - method=method, - return_cost=True) - assert spin_exact.shape == cost.shape == (len(coords), 10) - for s in spin_exact.T: - assert len(np.unique(s)) == len(s) - - # check that one hemisphere works - mask = hemi == 0 - spins, cost = stats.gen_spinsamples(coords[mask], hemi[mask], n_rotate=10, - seed=1234, return_cost=True) - assert spins.shape == cost.shape == (len(coords[mask]), 10) - - # confirm that check_duplicates will raise warnings - # since spins aren't exact permutations we need to use 4C4 with repeats - # and then perform one more rotation than that number (i.e., 35 + 1) - with pytest.warns(UserWarning): - i = [0, 1, -2, -1] # only grab a few coordinates - stats.gen_spinsamples(coords[i], hemi[i], n_rotate=36, seed=1234) - - # non-3D coords - with pytest.raises(ValueError): - stats.gen_spinsamples(coords[:, :2], hemi) - - # non-1D hemi - with pytest.raises(ValueError): - stats.gen_spinsamples(coords, np.column_stack([hemi, hemi])) - - # different length coords and hemi - with pytest.raises(ValueError): - stats.gen_spinsamples(coords, hemi[:-1]) diff --git a/netneurotools/tests/test_utils.py b/netneurotools/tests/test_utils.py deleted file mode 100644 index 1ac6b91..0000000 --- a/netneurotools/tests/test_utils.py +++ /dev/null @@ -1,47 +0,0 @@ -# -*- coding: utf-8 -*- -"""For testing netneurotools.utils functionality.""" - -import numpy as np -import pytest - -from netneurotools import datasets, utils - - -def test_add_constant(): - # if provided a vector it will return a 2D array - assert utils.add_constant(np.random.rand(100)).shape == (100, 2) - - # if provided a 2D array it will return the same, extended by 1 column - out = utils.add_constant(np.random.rand(100, 100)) - assert out.shape == (100, 101) and np.all(out[:, -1] == 1) - - -def test_add_triu(): - arr = np.arange(9).reshape(3, 3) - assert np.all(utils.get_triu(arr) == np.array([1, 2, 5])) - assert np.all(utils.get_triu(arr, k=0) == np.array([0, 1, 2, 4, 5, 8])) - - -@pytest.mark.parametrize('scale, expected', [ - ('scale033', 83), - ('scale060', 129), - ('scale125', 234), - ('scale250', 463), - ('scale500', 1015) -]) -def test_get_centroids(tmpdir, scale, expected): - # fetch test dataset - cammoun = datasets.fetch_cammoun2012('MNI152NLin2009aSym', data_dir=tmpdir, - verbose=0) - - ijk = utils.get_centroids(cammoun[scale]) - xyz = utils.get_centroids(cammoun[scale], image_space=True) - - # we get expected shape regardless of requested coordinate space - assert ijk.shape == xyz.shape == (expected, 3) - # ijk is all positive (i.e., cartesian) coordinates - assert np.all(ijk > 0) - - # requesting specific labels gives us a subset of the full `ijk` - lim = utils.get_centroids(cammoun[scale], labels=[1, 2, 3]) - assert np.all(lim == ijk[:3]) diff --git a/netneurotools/utils.py b/netneurotools/utils.py deleted file mode 100644 index 2d8839e..0000000 --- a/netneurotools/utils.py +++ /dev/null @@ -1,243 +0,0 @@ -# -*- coding: utf-8 -*- -"""Miscellaneous functions of various utility.""" - -import glob -import os -import subprocess - -import nibabel as nib -import numpy as np -from scipy import ndimage -from sklearn.utils.validation import check_array - - -def add_constant(data): - """ - Add a constant (i.e., intercept) term to `data`. - - Parameters - ---------- - data : (N, M) array_like - Samples by features data array - - Returns - ------- - data : (N, F) np.ndarray - Where `F` is `M + 1` - - Examples - -------- - >>> from netneurotools import utils - - >>> A = np.zeros((5, 5)) - >>> Ac = utils.add_constant(A) - >>> Ac - array([[0., 0., 0., 0., 0., 1.], - [0., 0., 0., 0., 0., 1.], - [0., 0., 0., 0., 0., 1.], - [0., 0., 0., 0., 0., 1.], - [0., 0., 0., 0., 0., 1.]]) - """ - data = check_array(data, ensure_2d=False) - return np.column_stack([data, np.ones(len(data))]) - - -def get_triu(data, k=1): - """ - Return vectorized version of upper triangle from `data`. - - Parameters - ---------- - data : (N, N) array_like - Input data - k : int, optional - Which diagonal to select from (where primary diagonal is 0). Default: 1 - - Returns - ------- - triu : (N * N-1 / 2) numpy.ndarray - Upper triangle of `data` - - Examples - -------- - >>> from netneurotools import utils - - >>> X = np.array([[1, 0.5, 0.25], [0.5, 1, 0.33], [0.25, 0.33, 1]]) - >>> tri = utils.get_triu(X) - >>> tri - array([0.5 , 0.25, 0.33]) - """ - return data[np.triu_indices(len(data), k=k)].copy() - - -def globpath(*args): - """ - Join `args` with :py:func:`os.path.join` and returns sorted glob output. - - Parameters - ---------- - args : str - Paths / `glob`-compatible regex strings - - Returns - ------- - files : list - Sorted list of files - """ - return sorted(glob.glob(os.path.join(*args))) - - -def rescale(data, low=0, high=1): - """ - Rescale `data` so it is within [`low`, `high`]. - - Parameters - ---------- - data : array_like - Input data array - low : float, optional - Lower bound for rescaling. Default: -1 - high : float, optional - Upper bound for rescaling. Default: 1 - - Returns - ------- - rescaled : np.ndarray - Rescaled data - """ - data = np.asarray(data) - rescaled = np.interp(data, (data.min(), data.max()), (low, high)) - - return rescaled - - -def run(cmd, env=None, return_proc=False, quiet=False): - """ - Run `cmd` via shell subprocess with provided environment `env`. - - Parameters - ---------- - cmd : str - Command to be run as single string - env : dict, optional - If provided, dictionary of key-value pairs to be added to base - environment when running `cmd`. Default: None - return_proc : bool, optional - Whether to return CompletedProcess object. Default: false - quiet : bool, optional - Whether to suppress stdout/stderr from subprocess. Default: False - - Returns - ------- - proc : subprocess.CompletedProcess - Process output - - Raises - ------ - subprocess.CalledProcessError - If subprocess does not exit cleanly - - Examples - -------- - >>> from netneurotools import utils - >>> p = utils.run('echo "hello world"', return_proc=True, quiet=True) - >>> p.returncode - 0 - >>> p.stdout # doctest: +SKIP - 'hello world\\n' - """ # noqa: D301 - merged_env = os.environ.copy() - if env is not None: - if not isinstance(env, dict): - raise TypeError('Provided `env` must be a dictionary, not {}' - .format(type(env))) - merged_env.update(env) - - opts = {} - if quiet: - opts = dict(stdout=subprocess.PIPE, stderr=subprocess.PIPE) - - proc = subprocess.run(cmd, env=merged_env, shell=True, check=True, - universal_newlines=True, **opts) - - if return_proc: - return proc - - -def check_fs_subjid(subject_id, subjects_dir=None): - """ - Check that `subject_id` exists in provided FreeSurfer `subjects_dir`. - - Parameters - ---------- - subject_id : str - FreeSurfer subject ID - subjects_dir : str, optional - Path to FreeSurfer subject directory. If not set, will inherit from - the environmental variable $SUBJECTS_DIR. Default: None - - Returns - ------- - subject_id : str - FreeSurfer subject ID, as provided - subjects_dir : str - Full filepath to `subjects_dir` - - Raises - ------ - FileNotFoundError - """ - # check inputs for subjects_dir and subject_id - if subjects_dir is None or not os.path.isdir(subjects_dir): - try: - subjects_dir = os.environ['SUBJECTS_DIR'] - except KeyError: - subjects_dir = os.getcwd() - else: - subjects_dir = os.path.abspath(subjects_dir) - - subjdir = os.path.join(subjects_dir, subject_id) - if not os.path.isdir(subjdir): - raise FileNotFoundError('Cannot find specified subject id {} in ' - 'provided subject directory {}.' - .format(subject_id, subjects_dir)) - - return subject_id, subjects_dir - - -def get_centroids(img, labels=None, image_space=False): - """ - Find centroids of `labels` in `img`. - - Parameters - ---------- - img : niimg-like object - 3D image containing integer label at each point - labels : array_like, optional - List of labels for which to find centroids. If not specified all - labels present in `img` will be used. Zero will be ignored as it is - considered "background." Default: None - image_space : bool, optional - Whether to return xyz (image space) coordinates for centroids based - on transformation in `img.affine`. Default: False - - Returns - ------- - centroids : (N, 3) np.ndarray - Coordinates of centroids for ROIs in input data - """ - from nilearn._utils import check_niimg_3d - - img = check_niimg_3d(img) - data = np.asarray(img.dataobj) - - if labels is None: - labels = np.trim_zeros(np.unique(data)) - - centroids = np.vstack(ndimage.center_of_mass(data, labels=data, - index=labels)) - - if image_space: - centroids = nib.affines.apply_affine(img.affine, centroids) - - return centroids diff --git a/pyproject.toml b/pyproject.toml index 9127969..9ef75fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,10 +37,15 @@ doc = [ "sphinx_rtd_theme", "sphinx-gallery" ] -plotting = [ +pysurfer = [ + "vtk", "mayavi", "pysurfer" ] +pyvista = [ + "vtk", + "pyvista" +] numba = [ "numba" ] @@ -76,12 +81,7 @@ tag_prefix = "" parentdir_prefix = "" [tool.ruff] -select = ["E", "F", "B", "W", "D", "NPY"] -ignore = [ - "B905", # zip() without an explicit strict= parameter - # "W605", # Invalid escape sequence: latex - "NPY002", # Replace legacy `np.random` call with `np.random.Generator` -] + line-length = 88 exclude = [ "setup.py", @@ -92,18 +92,25 @@ exclude = [ ] target-version = "py38" -[tool.ruff.pydocstyle] +[tool.ruff.lint] +select = ["E", "F", "B", "W", "D", "NPY"] +ignore = [ + "B905", # zip() without an explicit strict= parameter + # "W605", # Invalid escape sequence: latex + "NPY002", # Replace legacy `np.random` call with `np.random.Generator` +] + +[tool.ruff.lint.pydocstyle] convention = "numpy" -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "__init__.py" = ["D104", "F401"] -"netneurotools/tests/*" = ["B011", "D103"] +"test_*" = ["B011"] "examples/*" = ["E402", "D"] [tool.coverage.run] source = ["netneurotools"] omit = [ - "netneurotools/tests/*", "netneurotools/_version.py", ] From e8bb7e3889e866e3e9d41694d9ba297c8afbef6f Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Fri, 10 May 2024 15:46:09 -0400 Subject: [PATCH 02/32] Fix examples --- examples/plot_consensus_clustering.py | 4 ++-- examples/plot_perm_pvals.py | 13 ++++++------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/examples/plot_consensus_clustering.py b/examples/plot_consensus_clustering.py index 13faab1..69c337f 100644 --- a/examples/plot_consensus_clustering.py +++ b/examples/plot_consensus_clustering.py @@ -81,9 +81,9 @@ # We'll provide these different assignments to our consensus-finding algorithm # which will generate one final community assignment vector: -from netneurotools import cluster +from netneurotools import modularity -consensus = cluster.find_consensus(np.column_stack(ci), seed=1234) +consensus = modularity.find_consensus(np.column_stack(ci), seed=1234) plotting.plot_mod_heatmap(corr, consensus, cmap='viridis') ############################################################################### diff --git a/examples/plot_perm_pvals.py b/examples/plot_perm_pvals.py index 702d28a..6f87056 100644 --- a/examples/plot_perm_pvals.py +++ b/examples/plot_perm_pvals.py @@ -28,8 +28,8 @@ # We can use ``scipy.stats`` for a standard parametric test to assess whether # the array is different from zero: -from scipy import stats -print(stats.ttest_1samp(rvs, 0.0)) +import scipy.stats as sstats +print(sstats.ttest_1samp(rvs, 0.0)) ############################################################################### # And can do the same thing with permutations using ``netneurotools.stats``: @@ -88,7 +88,7 @@ # These two arrays shouldn't be meaningfully different, and we can test that # with a standard parametric test: -print(stats.ttest_rel(rvs1, rvs2)) +print(sstats.ttest_rel(rvs1, rvs2)) ############################################################################### # Or with a non-parametric permutation test: @@ -114,13 +114,12 @@ # # First, we'll generate two correlated variables: -from netneurotools import datasets -x, y = datasets.make_correlated_xy(corr=0.2, size=100) +x, y = nnstats.make_correlated_xy(corr=0.2, size=100) ############################################################################### # We can generate the Pearson correlation with the standard parametric p-value: -print(stats.pearsonr(x, y)) +print(sstats.pearsonr(x, y)) ############################################################################### # Or use permutation testing to derive the p-value: @@ -132,7 +131,7 @@ # :func:`~.permtest_rel` apply here, so you can provide same-sized arrays and # correlations will only be calculated for paired columns: -a, b = datasets.make_correlated_xy(corr=0.9, size=100) +a, b = nnstats.make_correlated_xy(corr=0.9, size=100) arr1, arr2 = np.column_stack([x, a]), np.column_stack([y, b]) print(nnstats.permtest_pearsonr(arr1, arr2)) From 8e320e759add4483ad6ff4a3b5163c1fe28a3129 Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Tue, 14 May 2024 17:40:41 -0400 Subject: [PATCH 03/32] Fix netneurotools.datasets --- netneurotools/datasets/_mirchi2018.py | 4 -- netneurotools/datasets/datasets.json | 9 +++++ netneurotools/datasets/datasets_utils.py | 4 +- netneurotools/datasets/fetch_atlas.py | 21 +++++----- netneurotools/datasets/fetch_project.py | 14 ++++--- netneurotools/datasets/fetch_template.py | 38 +++++++++++-------- ...atasetsutils.py => test_datasets_utils.py} | 0 netneurotools/datasets/tests/test_fetch.py | 9 +---- 8 files changed, 54 insertions(+), 45 deletions(-) rename netneurotools/datasets/tests/{test_datasetsutils.py => test_datasets_utils.py} (100%) diff --git a/netneurotools/datasets/_mirchi2018.py b/netneurotools/datasets/_mirchi2018.py index 3ef7e75..9addfea 100644 --- a/netneurotools/datasets/_mirchi2018.py +++ b/netneurotools/datasets/_mirchi2018.py @@ -5,7 +5,6 @@ import numpy as np - TIMESERIES = ("https://s3.amazonaws.com/openneuro/ds000031/ds000031_R1.0.2" "/uncompressed/derivatives/sub-01/ses-{0}/" "sub-01_ses-{0}_task-rest_run-001_parcel-timeseries.txt") @@ -129,6 +128,3 @@ def _get_panas(data_dir=None, resume=True, verbose=1): measures[subscale] = measure.sum(axis=-1) return measures - - - diff --git a/netneurotools/datasets/datasets.json b/netneurotools/datasets/datasets.json index 80bb3e3..d68669c 100644 --- a/netneurotools/datasets/datasets.json +++ b/netneurotools/datasets/datasets.json @@ -401,5 +401,14 @@ "5ed80005fabc45000d639900" ], "md5": "67085e2577d21dc3a742f4fcde6e3b18" + }, + "tpl-hcp_standards": { + "standard_mesh_atlases": { + "url": [ + "mb37e", + "6643d2ab2eacc48a57097091" + ], + "md5": "806abac71f76b8dba8af467ef313c3f7" + } } } diff --git a/netneurotools/datasets/datasets_utils.py b/netneurotools/datasets/datasets_utils.py index f4bae92..42b4920 100644 --- a/netneurotools/datasets/datasets_utils.py +++ b/netneurotools/datasets/datasets_utils.py @@ -19,6 +19,7 @@ 'unknown', 'corpuscallosum', 'Background+FreeSurfer_Defined_Medial_Wall' ] + def _osfify_urls(data): """ Format `data` object with OSF API URL. @@ -167,7 +168,6 @@ def _get_freesurfer_subjid(subject_id, subjects_dir=None): subjects_dir : str Path to subject directory with `subject_id` """ - # check for FreeSurfer install w/fsaverage; otherwise, fetch required try: subject_id, subjects_dir = _check_freesurfer_subjid(subject_id, subjects_dir) @@ -182,4 +182,4 @@ def _get_freesurfer_subjid(subject_id, subjects_dir=None): subjects_dir = os.path.join(_get_data_dir(), 'tpl-fsaverage') subject_id, subjects_dir = _check_freesurfer_subjid(subject_id, subjects_dir) - return subject_id, subjects_dir \ No newline at end of file + return subject_id, subjects_dir diff --git a/netneurotools/datasets/fetch_atlas.py b/netneurotools/datasets/fetch_atlas.py index bc19e42..56571cc 100644 --- a/netneurotools/datasets/fetch_atlas.py +++ b/netneurotools/datasets/fetch_atlas.py @@ -5,9 +5,9 @@ try: # nilearn 0.10.3 - from nilearn.datasets._utils import fetch_files as _fetch_files + from nilearn.datasets._utils import fetch_files except ImportError: - from nilearn.datasets.utils import _fetch_files + from nilearn.datasets.utils import _fetch_files as fetch_files from sklearn.utils import Bunch @@ -16,6 +16,7 @@ _get_data_dir, _get_dataset_info ) + def fetch_cammoun2012(version='MNI152NLin2009aSym', data_dir=None, url=None, resume=True, verbose=1): """ @@ -128,7 +129,7 @@ def fetch_cammoun2012(version='MNI152NLin2009aSym', data_dir=None, url=None, files = [ (op.join(dataset_name, version, f), url, opts) for f in filenames ] - data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose) + data = fetch_files(data_dir, files=files, resume=resume, verbose=verbose) if version == 'MNI152NLin2009aSym': keys += ['info'] @@ -142,7 +143,6 @@ def fetch_cammoun2012(version='MNI152NLin2009aSym', data_dir=None, url=None, return Bunch(**dict(zip(keys, data))) - def fetch_schaefer2018(version='fsaverage', data_dir=None, url=None, resume=True, verbose=1): """ @@ -218,7 +218,7 @@ def fetch_schaefer2018(version='fsaverage', data_dir=None, url=None, files = [(op.join(dataset_name, version, f), url, opts) for f in filenames] - data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose) + data = fetch_files(data_dir, files=files, resume=resume, verbose=verbose) if suffix == 'annot': data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)] @@ -226,7 +226,6 @@ def fetch_schaefer2018(version='fsaverage', data_dir=None, url=None, return Bunch(**dict(zip(keys, data))) - def fetch_mmpall(version='fslr32k', data_dir=None, url=None, resume=True, verbose=1): """ @@ -292,12 +291,11 @@ def fetch_mmpall(version='fslr32k', data_dir=None, url=None, resume=True, ] files = [(op.join(dataset_name, version, f), url, opts) for f in filenames] - data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose) + data = fetch_files(data_dir, files=files, resume=resume, verbose=verbose) return SURFACE(*data) - def fetch_pauli2018(data_dir=None, url=None, resume=True, verbose=1): """ Download files for Pauli et al., 2018 subcortical parcellation. @@ -339,13 +337,13 @@ def fetch_pauli2018(data_dir=None, url=None, resume=True, verbose=1): data_dir = _get_data_dir(data_dir=data_dir) info = _get_dataset_info(dataset_name) - # format the query how _fetch_files() wants things and then download data + # format the query how fetch_files() wants things and then download data files = [ (i['name'], i['url'], dict(md5sum=i['md5'], move=i['name'])) for i in info ] - data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose) + data = fetch_files(data_dir, files=files, resume=resume, verbose=verbose) return Bunch(**dict(zip(keys, data))) @@ -355,7 +353,6 @@ def fetch_ye2020(): pass - def fetch_voneconomo(data_dir=None, url=None, resume=True, verbose=1): """ Fetch von-Economo Koskinas probabilistic FreeSurfer atlas. @@ -407,7 +404,7 @@ def fetch_voneconomo(data_dir=None, url=None, resume=True, verbose=1): for hemi in ['L', 'R'] for suff in ['gcs', 'ctab'] ] + ['atl-vonEconomoKoskinas_info.csv'] files = [(op.join(dataset_name, f), url, opts) for f in filenames] - data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose) + data = fetch_files(data_dir, files=files, resume=resume, verbose=verbose) data = [SURFACE(*data[:-1:2])] + [SURFACE(*data[1:-1:2])] + [data[-1]] return Bunch(**dict(zip(keys, data))) diff --git a/netneurotools/datasets/fetch_project.py b/netneurotools/datasets/fetch_project.py index c0cd94a..4b382d1 100644 --- a/netneurotools/datasets/fetch_project.py +++ b/netneurotools/datasets/fetch_project.py @@ -5,9 +5,9 @@ try: # nilearn 0.10.3 - from nilearn.datasets._utils import fetch_files as _fetch_files + from nilearn.datasets._utils import fetch_files except ImportError: - from nilearn.datasets.utils import _fetch_files + from nilearn.datasets.utils import _fetch_files as fetch_files from sklearn.utils import Bunch @@ -63,7 +63,7 @@ def fetch_vazquez_rodriguez2019(data_dir=None, url=None, resume=True, filenames = [ op.join(dataset_name, 'rsquared_gradient.csv') ] - data = _fetch_files(data_dir, files=[(f, url, opts) for f in filenames], + data = fetch_files(data_dir, files=[(f, url, opts) for f in filenames], resume=resume, verbose=verbose) # load data @@ -120,28 +120,32 @@ def fetch_hansen_manynetworks(): """Download files from Hansen et al., 2023, PLOS Biology.""" pass + def fetch_hansen_receptors(): """Download files from Hansen et al., 2022, Nature Neuroscience.""" pass + def fetch_hansen_genecognition(): """Download files from Hansen et al., 2021, Nature Human Behaviour.""" pass + def fetch_hansen_brainstem(): """Download files from Hansen et al., 2024.""" pass + def fetch_shafiei_hcpmeg(): """Download files from Shafiei et al., 2022 & Shafiei et al., 2023.""" pass + def fetch_suarez_mami(): """Download files from Suarez et al., 2022, eLife.""" pass - def fetch_famous_gmat(dataset, data_dir=None, url=None, resume=True, verbose=1): """ @@ -201,7 +205,7 @@ def fetch_famous_gmat(dataset, data_dir=None, url=None, resume=True, filenames = [ op.join(dataset, '{}.csv'.format(fn)) for fn in info['keys'] ] + [op.join(dataset, 'ref.txt')] - data = _fetch_files(data_dir, files=[(f, url, opts) for f in filenames], + data = fetch_files(data_dir, files=[(f, url, opts) for f in filenames], resume=resume, verbose=verbose) # load data diff --git a/netneurotools/datasets/fetch_template.py b/netneurotools/datasets/fetch_template.py index 9a88369..232a4c5 100644 --- a/netneurotools/datasets/fetch_template.py +++ b/netneurotools/datasets/fetch_template.py @@ -6,9 +6,9 @@ try: # nilearn 0.10.3 - from nilearn.datasets._utils import fetch_files as _fetch_files + from nilearn.datasets._utils import fetch_files except ImportError: - from nilearn.datasets.utils import _fetch_files + from nilearn.datasets.utils import _fetch_files as fetch_files from sklearn.utils import Bunch @@ -78,7 +78,7 @@ def fetch_fsaverage(version='fsaverage', data_dir=None, url=None, resume=True, data_dir = _check_freesurfer_subjid(version)[1] data = [op.join(data_dir, f) for f in filenames] except FileNotFoundError: - data = _fetch_files(data_dir, resume=resume, verbose=verbose, + data = fetch_files(data_dir, resume=resume, verbose=verbose, files=[(op.join(dataset_name, f), url, opts) for f in filenames]) @@ -87,8 +87,6 @@ def fetch_fsaverage(version='fsaverage', data_dir=None, url=None, resume=True, return Bunch(**dict(zip(keys, data))) - - def fetch_hcp_standards(data_dir=None, url=None, resume=True, verbose=1): """ Fetch HCP standard mesh atlases for converting between FreeSurfer and HCP. @@ -112,24 +110,34 @@ def fetch_hcp_standards(data_dir=None, url=None, resume=True, verbose=1): ------- standards : str Filepath to standard_mesh_atlases directory + + Notes + ----- + Original file from: http://brainvis.wustl.edu/workbench/standard_mesh_atlases.zip + Archived file from: https://web.archive.org/web/20220121035833/http://brainvis.wustl.edu/workbench/standard_mesh_atlases.zip """ - if url is None: - url = 'https://web.archive.org/web/20220121035833/' + \ - 'http://brainvis.wustl.edu/workbench/standard_mesh_atlases.zip' - dataset_name = 'standard_mesh_atlases' + dataset_name = 'tpl-hcp_standards' data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(dataset_name)["standard_mesh_atlases"] + if url is None: + url = info['url'] + opts = { 'uncompress': True, - 'move': '{}.zip'.format(dataset_name) + 'md5sum': info['md5'], + 'move': '{}.tar.gz'.format(dataset_name) } filenames = [ 'L.sphere.32k_fs_LR.surf.gii', 'R.sphere.32k_fs_LR.surf.gii' ] - files = [(op.join(dataset_name, f), url, opts) for f in filenames] - _fetch_files(data_dir, files=files, resume=resume, verbose=verbose) + files = [(op.join(dataset_name, "standard_mesh_atlases", f), url, opts) + for f in filenames] + + fetch_files(data_dir, files=files, resume=resume, verbose=verbose) return op.join(data_dir, dataset_name) + def fetch_civet(density='41k', version='v1', data_dir=None, url=None, resume=True, verbose=1): """ @@ -207,7 +215,7 @@ def fetch_civet(density='41k', version='v1', data_dir=None, url=None, for surf in keys for hemi in ['L', 'R'] ] - data = _fetch_files(data_dir, resume=resume, verbose=verbose, + data = fetch_files(data_dir, resume=resume, verbose=verbose, files=[(f, url, opts) for f in filenames]) data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)] @@ -273,7 +281,7 @@ def fetch_conte69(data_dir=None, url=None, resume=True, verbose=1): .format(res, hemi) for res in keys for hemi in ['L', 'R'] ] + ['tpl-conte69/template_description.json'] - data = _fetch_files(data_dir, files=[(f, url, opts) for f in filenames], + data = fetch_files(data_dir, files=[(f, url, opts) for f in filenames], resume=resume, verbose=verbose) with open(data[-1], 'r') as src: @@ -344,7 +352,7 @@ def fetch_yerkes19(data_dir=None, url=None, resume=None, verbose=1): .format(res, hemi) for res in keys for hemi in ['L', 'R'] ] - data = _fetch_files(data_dir, files=[(f, url, opts) for f in filenames], + data = fetch_files(data_dir, files=[(f, url, opts) for f in filenames], resume=resume, verbose=verbose) # bundle hemispheres together diff --git a/netneurotools/datasets/tests/test_datasetsutils.py b/netneurotools/datasets/tests/test_datasets_utils.py similarity index 100% rename from netneurotools/datasets/tests/test_datasetsutils.py rename to netneurotools/datasets/tests/test_datasets_utils.py diff --git a/netneurotools/datasets/tests/test_fetch.py b/netneurotools/datasets/tests/test_fetch.py index adbf983..02a26c3 100644 --- a/netneurotools/datasets/tests/test_fetch.py +++ b/netneurotools/datasets/tests/test_fetch.py @@ -4,6 +4,7 @@ import numpy as np from netneurotools import datasets + @pytest.mark.parametrize('version', [ 'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6' ]) @@ -20,7 +21,7 @@ def test_fetch_fsaverage(tmpdir, version): def test_fetch_hcp_standards(tmpdir): """Test fetching of HCP standard meshes.""" - hcp = datasets.fetch_hcp_standards(data_dir=tmpdir, verbose=0) + hcp = datasets.fetch_hcp_standards(data_dir=tmpdir, verbose=1) assert os.path.isdir(hcp) @@ -51,7 +52,6 @@ def test_fetch_yerkes19(tmpdir): ['midthickness', 'inflated', 'vinflated']) - @pytest.mark.parametrize('version, expected', [ ('MNI152NLin2009aSym', [1, 1, 1, 1, 1]), ('fsaverage', [2, 2, 2, 2, 2]), @@ -121,7 +121,6 @@ def test_fetch_ye2020(tmpdir): pass - def test_fetch_voneconomo(tmpdir): """Test fetching of von Economo parcellations.""" vek = datasets.fetch_voneconomo(data_dir=tmpdir, verbose=0) @@ -154,7 +153,3 @@ def test_fetch_famous_gmat(tmpdir, dataset, expected): for key in expected: assert (key in connectome) assert isinstance(connectome[key], str if key == 'ref' else np.ndarray) - - - - From 84796220c8fcb4fd459b7aa1c399393eea7be878 Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Tue, 14 May 2024 17:41:17 -0400 Subject: [PATCH 04/32] Lots of fixes --- netneurotools/metrics/bct.py | 11 +-- netneurotools/metrics/metrics_utils.py | 3 +- netneurotools/metrics/spreading.py | 2 +- netneurotools/metrics/statistical.py | 1 + netneurotools/modularity/__init__.py | 2 +- netneurotools/modularity/modules.py | 3 +- .../modularity/tests/test_modules.py | 3 +- netneurotools/networks/consensus.py | 3 +- netneurotools/networks/networks_utils.py | 3 +- netneurotools/networks/randomize.py | 85 +++++++++---------- .../networks/tests/test_networks_utils.py | 5 +- netneurotools/plotting/__init__.py | 2 +- netneurotools/plotting/color_utils.py | 4 +- netneurotools/plotting/mpl_plotters.py | 4 +- netneurotools/plotting/pysurfer_plotters.py | 8 +- netneurotools/plotting/pyvista_plotters.py | 3 +- ...test_colorutils.py => test_color_utils.py} | 0 netneurotools/plotting/tests/test_mpl.py | 2 + netneurotools/plotting/tests/test_pysurfer.py | 3 +- netneurotools/spatial/spatial_stats.py | 3 +- netneurotools/stats/__init__.py | 1 - netneurotools/stats/correlation.py | 5 +- netneurotools/stats/permutation_test.py | 3 +- netneurotools/stats/regression.py | 5 +- netneurotools/stats/stats_utils.py | 2 - netneurotools/stats/tests/test_correlation.py | 1 + netneurotools/stats/tests/test_permutation.py | 2 +- netneurotools/stats/tests/test_regression.py | 2 +- pyproject.toml | 2 + 29 files changed, 84 insertions(+), 89 deletions(-) rename netneurotools/plotting/tests/{test_colorutils.py => test_color_utils.py} (100%) diff --git a/netneurotools/metrics/bct.py b/netneurotools/metrics/bct.py index c98df74..3996350 100644 --- a/netneurotools/metrics/bct.py +++ b/netneurotools/metrics/bct.py @@ -19,6 +19,7 @@ from .metrics_utils import _fast_binarize + def degrees_und(W): """ Compute the degree of each node in `W`. @@ -64,8 +65,6 @@ def degrees_dir(W): return deg_in, deg_out, deg - - def distance_wei_floyd(D): """ Compute the all-pairs shortest path length using Floyd-Warshall algorithm. @@ -146,7 +145,6 @@ def retrieve_shortest_path(s, t, p_mat): retrieve_shortest_path = njit(retrieve_shortest_path) - def navigation_wu(nav_dist_mat, sc_mat): """ Compute network navigation. @@ -286,7 +284,6 @@ def get_navigation_path_length(nav_paths, alt_dist_mat): return nav_path_len - def communicability_bin(adjacency, normalize=False): """ Compute the communicability of pairs of nodes in `adjacency`. @@ -378,7 +375,6 @@ def communicability_wei(adjacency): return cmc - def path_transitivity(D): """ Calculate path transitivity. @@ -541,7 +537,6 @@ def search_information(W, D, has_memory=False): return SI - def mean_first_passage_time(W, tol=1e-3): """ Calculate mean first passage time. @@ -636,8 +631,6 @@ def diffusion_efficiency(W): return GE_diff, E_diff - - def resource_efficiency_bin(W_bin, lambda_prob=0.5): """ Calculate resource efficiency and shortest-path probability. @@ -776,13 +769,11 @@ def flow_graph(W, r=None, t=1): return dyn - def assortativity(W, r=None): """Calculate assortativity.""" pass - def matching_ind_und(W): """ Calculate undirected matching index. diff --git a/netneurotools/metrics/metrics_utils.py b/netneurotools/metrics/metrics_utils.py index 908d66a..b32f632 100644 --- a/netneurotools/metrics/metrics_utils.py +++ b/netneurotools/metrics/metrics_utils.py @@ -8,6 +8,7 @@ except ImportError: use_numba = False + def _fast_binarize(W): """ Binarize a matrix. @@ -62,4 +63,4 @@ def _graph_laplacian(W): if use_numba: - _graph_laplacian = njit(_graph_laplacian) # ("float64[:,::1](float64[:,::1])") \ No newline at end of file + _graph_laplacian = njit(_graph_laplacian) # ("float64[:,::1](float64[:,::1])") diff --git a/netneurotools/metrics/spreading.py b/netneurotools/metrics/spreading.py index b91f7bf..8a5fb79 100644 --- a/netneurotools/metrics/spreading.py +++ b/netneurotools/metrics/spreading.py @@ -2,5 +2,5 @@ def simulate_atrophy(): - """Function to simulate atrophy in a network.""" + """Simulate atrophy in a network.""" pass diff --git a/netneurotools/metrics/statistical.py b/netneurotools/metrics/statistical.py index ea5e945..db1bed8 100644 --- a/netneurotools/metrics/statistical.py +++ b/netneurotools/metrics/statistical.py @@ -10,6 +10,7 @@ from .metrics_utils import _graph_laplacian + def network_pearsonr(annot1, annot2, weight): r""" Calculate pearson correlation between two annotation vectors. diff --git a/netneurotools/modularity/__init__.py b/netneurotools/modularity/__init__.py index 4f320cb..2fe84ad 100644 --- a/netneurotools/modularity/__init__.py +++ b/netneurotools/modularity/__init__.py @@ -22,4 +22,4 @@ 'find_consensus', 'consensus_modularity', '_dummyvar', 'zrand', '_zrand_partitions', 'get_modularity', 'get_modularity_z', 'get_modularity_sig', -] \ No newline at end of file +] diff --git a/netneurotools/modularity/modules.py b/netneurotools/modularity/modules.py index 4eef241..a1b42fd 100644 --- a/netneurotools/modularity/modules.py +++ b/netneurotools/modularity/modules.py @@ -14,7 +14,6 @@ use_numba = False - def _get_relabels(c1, c2): """ Find mapping of labels from `c1` to `c2`. @@ -670,4 +669,4 @@ def get_modularity_sig(adjacency, comm, gamma=1, n_perm=10000, alpha=0.01, q_sig = real_qs > np.percentile(simu_qs, 100 * (1 - alpha), axis=1) - return q_sig \ No newline at end of file + return q_sig diff --git a/netneurotools/modularity/tests/test_modules.py b/netneurotools/modularity/tests/test_modules.py index 12b82cd..64248b9 100644 --- a/netneurotools/modularity/tests/test_modules.py +++ b/netneurotools/modularity/tests/test_modules.py @@ -9,6 +9,7 @@ rs = np.random.RandomState(1234) + @pytest.mark.parametrize('c1, c2, out', [ # uniform communities (np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]), @@ -130,4 +131,4 @@ def test_zrand_partitions(): # partition of labels that are all the same should have higher average # zrand and lower stdev zrand assert np.nanmean(all_same) > np.nanmean(all_diff) - assert np.nanstd(all_same) < np.nanstd(all_diff) \ No newline at end of file + assert np.nanstd(all_same) < np.nanstd(all_diff) diff --git a/netneurotools/networks/consensus.py b/netneurotools/networks/consensus.py index b091110..cc48baf 100644 --- a/netneurotools/networks/consensus.py +++ b/netneurotools/networks/consensus.py @@ -5,6 +5,7 @@ check_random_state, check_array, check_consistent_length ) + def func_consensus(data, n_boot=1000, ci=95, seed=None): """ Calculate thresholded group consensus functional connectivity graph. @@ -291,5 +292,3 @@ def struct_consensus(data, distance, hemiid, if weighted: consensus = consensus * np.mean(data, axis=2) return consensus - - diff --git a/netneurotools/networks/networks_utils.py b/netneurotools/networks/networks_utils.py index bbf401f..98925b5 100644 --- a/netneurotools/networks/networks_utils.py +++ b/netneurotools/networks/networks_utils.py @@ -3,6 +3,7 @@ import numpy as np from scipy.sparse import csgraph + def get_triu(data, k=1): """ Return vectorized version of upper triangle from `data`. @@ -124,4 +125,4 @@ def threshold_network(network, retain=10): mst[inds] = graph[inds] mst = np.array((mst + mst.T) != 0, dtype=int) - return mst \ No newline at end of file + return mst diff --git a/netneurotools/networks/randomize.py b/netneurotools/networks/randomize.py index 44227d0..a91f9ec 100644 --- a/netneurotools/networks/randomize.py +++ b/netneurotools/networks/randomize.py @@ -4,7 +4,7 @@ import numpy as np from tqdm import tqdm from sklearn.utils.validation import ( - check_random_state, check_array, check_consistent_length + check_random_state ) try: @@ -96,6 +96,7 @@ def randmio_und(W, itr): return W, eff + if use_numba: randmio_und = njit(randmio_und) @@ -245,7 +246,7 @@ def match_length_degree_distribution(W, D, nbins=10, nswap=1000, if len(np.where(B != 0)[0]) != len(np.where(newB != 0)[0]): print( f"ERROR --- number of edges changed, \ - B:{len(np.where(B!=0)[0])}, newB:{len(np.where(newB!=0)[0])}") + B:{len(np.where(B != 0)[0])}, newB:{len(np.where(newB != 0)[0])}") # check that the degree of the nodes it's the same for i in range(N): if np.sum(B[i]) != np.sum(newB[i]): @@ -277,8 +278,6 @@ def match_length_degree_distribution(W, D, nbins=10, nswap=1000, return newB, newW, nr - - def strength_preserving_rand_sa(A, rewiring_iter=10, nstage=100, niter=10000, temp=1000, frac=0.5, @@ -376,11 +375,11 @@ def strength_preserving_rand_sa(A, rewiring_iter=10, rs = check_random_state(seed) n = A.shape[0] - s = np.sum(A, axis=1) #strengths of A + s = np.sum(A, axis=1) # strengths of A - #Maslov & Sneppen rewiring + # Maslov & Sneppen rewiring if R is None: - #ensuring connectedness if the original network is connected + # ensuring connectedness if the original network is connected if connected is None: connected = False if bct.number_of_components(A) > 1 else True if connected: @@ -390,10 +389,10 @@ def strength_preserving_rand_sa(A, rewiring_iter=10, else: B = R.copy() - u, v = np.triu(B, k=1).nonzero() #upper triangle indices - wts = np.triu(B, k=1)[(u, v)] #upper triangle values + u, v = np.triu(B, k=1).nonzero() # upper triangle indices + wts = np.triu(B, k=1)[(u, v)] # upper triangle values m = len(wts) - sb = np.sum(B, axis=1) #strengths of B + sb = np.sum(B, axis=1) # strengths of B if energy_func is not None: energy = energy_func(s, sb) @@ -423,7 +422,7 @@ def strength_preserving_rand_sa(A, rewiring_iter=10, naccept = 0 for _ in range(niter): - #permutation + # permutation e1 = rs.randint(m) e2 = rs.randint(m) @@ -452,9 +451,9 @@ def strength_preserving_rand_sa(A, rewiring_iter=10, "Received: {}.".format(energy_type)) raise ValueError(msg) - #permutation acceptance criterion + # permutation acceptance criterion if (energy_prime < energy or - rs.rand() < np.exp(-(energy_prime - energy)/temp)): + rs.rand() < np.exp(-(energy_prime - energy) / temp)): sb = sb_prime.copy() wts[[e1, e2]] = wts[[e2, e1]] energy = energy_prime @@ -463,13 +462,13 @@ def strength_preserving_rand_sa(A, rewiring_iter=10, wtsmin = wts.copy() naccept = naccept + 1 - #temperature update - temp = temp*frac + # temperature update + temp = temp * frac if verbose: print('\nstage {:d}, temp {:.5f}, best energy {:.5f}, ' 'frac of accepted moves {:.3f}'.format(istage, temp, energymin, - naccept/niter)) + naccept / niter)) B = np.zeros((n, n)) B[(u, v)] = wtsmin @@ -561,11 +560,11 @@ def strength_preserving_rand_sa_mse_opt(A, rewiring_iter=10, rs = check_random_state(seed) n = A.shape[0] - s = np.sum(A, axis=1) #strengths of A + s = np.sum(A, axis=1) # strengths of A - #Maslov & Sneppen rewiring + # Maslov & Sneppen rewiring if R is None: - #ensuring connectedness if the original network is connected + # ensuring connectedness if the original network is connected if connected is None: connected = False if bct.number_of_components(A) > 1 else True if connected: @@ -575,10 +574,10 @@ def strength_preserving_rand_sa_mse_opt(A, rewiring_iter=10, else: B = R.copy() - u, v = np.triu(B, k=1).nonzero() #upper triangle indices - wts = np.triu(B, k=1)[(u, v)] #upper triangle values + u, v = np.triu(B, k=1).nonzero() # upper triangle indices + wts = np.triu(B, k=1)[(u, v)] # upper triangle values m = len(wts) - sb = np.sum(B, axis=1) #strengths of B + sb = np.sum(B, axis=1) # strengths of B energy = np.mean((s - sb)**2) @@ -594,7 +593,7 @@ def strength_preserving_rand_sa_mse_opt(A, rewiring_iter=10, rs.rand(niter) ): - #permutation + # permutation a, b, c, d = u[e1], v[e1], u[e2], v[e2] wts_change = wts[e1] - wts[e2] delta_energy = (2 * wts_change * @@ -604,10 +603,10 @@ def strength_preserving_rand_sa_mse_opt(A, rewiring_iter=10, (s[c] - sb[c]) - (s[d] - sb[d]) ) - )/n + ) / n - #permutation acceptance criterion - if (delta_energy < 0 or prob < np.e**(-(delta_energy)/temp)): + # permutation acceptance criterion + if (delta_energy < 0 or prob < np.e**(-(delta_energy) / temp)): sb[[a, b]] -= wts_change sb[[c, d]] += wts_change @@ -620,13 +619,13 @@ def strength_preserving_rand_sa_mse_opt(A, rewiring_iter=10, wtsmin = wts.copy() naccept = naccept + 1 - #temperature update - temp = temp*frac + # temperature update + temp = temp * frac if verbose: print('\nstage {:d}, temp {:.5f}, best energy {:.5f}, ' 'frac of accepted moves {:.3f}'.format(istage, temp, energymin, - naccept/niter)) + naccept / niter)) B = np.zeros((n, n)) B[(u, v)] = wtsmin @@ -728,20 +727,20 @@ def strength_preserving_rand_sa_dir(A, rewiring_iter=10, rs = check_random_state(seed) n = A.shape[0] - s_in = np.sum(A, axis=0) #in-strengths of A - s_out = np.sum(A, axis=1) #out-strengths of A + s_in = np.sum(A, axis=0) # in-strengths of A + s_out = np.sum(A, axis=1) # out-strengths of A - #Maslov & Sneppen rewiring + # Maslov & Sneppen rewiring if connected: B = bct.randmio_dir_connected(A, rewiring_iter, seed=seed)[0] else: B = bct.randmio_dir(A, rewiring_iter, seed=seed)[0] - u, v = B.nonzero() #nonzero indices of B - wts = B[(u, v)] #nonzero values of B + u, v = B.nonzero() # nonzero indices of B + wts = B[(u, v)] # nonzero values of B m = len(wts) - sb_in = np.sum(B, axis=0) #in-strengths of B - sb_out = np.sum(B, axis=1) #out-strengths of B + sb_in = np.sum(B, axis=0) # in-strengths of B + sb_out = np.sum(B, axis=1) # out-strengths of B if energy_func is not None: energy = energy_func(s_in, sb_in) + energy_func(s_out, sb_out) @@ -750,7 +749,7 @@ def strength_preserving_rand_sa_dir(A, rewiring_iter=10, elif energy_type == 'max': energy = np.max(np.abs(s_in - sb_in)) + np.max(np.abs(s_out - sb_out)) elif energy_type == 'mae': - energy= np.mean(np.abs(s_in - sb_in)) + np.mean(np.abs(s_out - sb_out)) + energy = np.mean(np.abs(s_in - sb_in)) + np.mean(np.abs(s_out - sb_out)) elif energy_type == 'mse': energy = np.mean((s_in - sb_in)**2) + np.mean((s_out - sb_out)**2) elif energy_type == 'rmse': @@ -772,7 +771,7 @@ def strength_preserving_rand_sa_dir(A, rewiring_iter=10, naccept = 0 for _ in range(niter): - #permutation + # permutation e1 = rs.randint(m) e2 = rs.randint(m) @@ -810,9 +809,9 @@ def strength_preserving_rand_sa_dir(A, rewiring_iter=10, "Received: {}.".format(energy_type)) raise ValueError(msg) - #permutation acceptance criterion + # permutation acceptance criterion if (energy_prime < energy or - rs.rand() < np.exp(-(energy_prime - energy)/temp)): + rs.rand() < np.exp(-(energy_prime - energy) / temp)): sb_in = sb_prime_in.copy() sb_out = sb_prime_out.copy() wts[[e1, e2]] = wts[[e2, e1]] @@ -822,13 +821,13 @@ def strength_preserving_rand_sa_dir(A, rewiring_iter=10, wtsmin = wts.copy() naccept = naccept + 1 - #temperature update - temp = temp*frac + # temperature update + temp = temp * frac if verbose: print('\nstage {:d}, temp {:.5f}, best energy {:.5f}, ' 'frac of accepted moves {:.3f}'.format(istage, temp, energymin, - naccept/niter)) + naccept / niter)) B = np.zeros((n, n)) B[(u, v)] = wtsmin diff --git a/netneurotools/networks/tests/test_networks_utils.py b/netneurotools/networks/tests/test_networks_utils.py index cabd085..7bbcfef 100644 --- a/netneurotools/networks/tests/test_networks_utils.py +++ b/netneurotools/networks/tests/test_networks_utils.py @@ -1,11 +1,12 @@ """For testing netneurotools.networks.networks_utils functionality.""" -import pytest import numpy as np from netneurotools import networks + def test_get_triu(): + """Test that get_triu returns correct values.""" arr = np.arange(9).reshape(3, 3) assert np.all(networks.get_triu(arr) == np.array([1, 2, 5])) - assert np.all(networks.get_triu(arr, k=0) == np.array([0, 1, 2, 4, 5, 8])) \ No newline at end of file + assert np.all(networks.get_triu(arr, k=0) == np.array([0, 1, 2, 4, 5, 8])) diff --git a/netneurotools/plotting/__init__.py b/netneurotools/plotting/__init__.py index a55f49c..2124fe3 100644 --- a/netneurotools/plotting/__init__.py +++ b/netneurotools/plotting/__init__.py @@ -28,7 +28,7 @@ 'pv_plot_surface', # mpl_plotters '_grid_communities', '_sort_communities', - 'plot_point_brain', 'plot_mod_heatmap' , + 'plot_point_brain', 'plot_mod_heatmap', # color_utils 'available_cmaps' ] diff --git a/netneurotools/plotting/color_utils.py b/netneurotools/plotting/color_utils.py index 692baf8..e3eefbe 100644 --- a/netneurotools/plotting/color_utils.py +++ b/netneurotools/plotting/color_utils.py @@ -1,5 +1,6 @@ """Functions for working with colors and colormaps.""" +import matplotlib from matplotlib.colors import LinearSegmentedColormap, ListedColormap __all__ = ['parula', 'justine', 'dinosaur'] @@ -93,10 +94,9 @@ def available_cmaps(): def _register_cmaps(): """Register all colormaps in module so they are accessible via matplotlib.""" - from matplotlib.cm import register_cmap for cmap in __all__: - register_cmap(cmap, globals()[cmap]) + matplotlib.colormaps.register(globals()[cmap], name=cmap) _register_cmaps() diff --git a/netneurotools/plotting/mpl_plotters.py b/netneurotools/plotting/mpl_plotters.py index 2f79b94..66dcd9b 100644 --- a/netneurotools/plotting/mpl_plotters.py +++ b/netneurotools/plotting/mpl_plotters.py @@ -124,7 +124,7 @@ def plot_mod_heatmap(data, communities, *, inds=None, edgecolor='black', inds = _sort_communities(data, communities) if ax is None: - fig, ax = plt.subplots(1, 1, figsize=figsize) + _, ax = plt.subplots(1, 1, figsize=figsize) # plot data re-ordered based on community and node strength if mask_diagonal: @@ -199,8 +199,6 @@ def plot_mod_heatmap(data, communities, *, inds=None, edgecolor='black', return ax - - def plot_point_brain(data, coords, views=None, views_orientation='vertical', views_size=(4, 2.4), cbar=False, robust=True, size=50, **kwargs): diff --git a/netneurotools/plotting/pysurfer_plotters.py b/netneurotools/plotting/pysurfer_plotters.py index 4dd88b6..50e830b 100644 --- a/netneurotools/plotting/pysurfer_plotters.py +++ b/netneurotools/plotting/pysurfer_plotters.py @@ -6,6 +6,7 @@ from ..datasets import FREESURFER_IGNORE, _get_freesurfer_subjid + def plot_conte69(data, lhlabel, rhlabel, surf='midthickness', vmin=None, vmax=None, colormap='viridis', colorbar=True, num_labels=4, orientation='horizontal', @@ -188,9 +189,6 @@ def plot_fslr(data, lhlabel, rhlabel, surf_atlas='conte69', return lhplot, rhplot - - - def plot_fsaverage(data, *, lhannot, rhannot, order='lr', mask=None, noplot=None, subject_id='fsaverage', subjects_dir=None, vmin=None, vmax=None, **kwargs): @@ -302,7 +300,7 @@ def _decode_list(vals): # loads annotation data for hemisphere, including vertex `labels`! if not annot.startswith(os.path.abspath(os.sep)): annot = os.path.join(subjects_dir, subject_id, 'label', annot) - labels, ctab, names = nib.freesurfer.read_annot(annot) + labels, _, names = nib.freesurfer.read_annot(annot) names = _decode_list(names) # get appropriate data, accounting for hemispheric asymmetry @@ -480,4 +478,4 @@ def plot_fsvertex(data, *, order='lr', surf='pial', views='lat', cm.scalar_bar.number_of_labels = num_labels surf[n].render() - return brain \ No newline at end of file + return brain diff --git a/netneurotools/plotting/pyvista_plotters.py b/netneurotools/plotting/pyvista_plotters.py index fc98223..7b17565 100644 --- a/netneurotools/plotting/pyvista_plotters.py +++ b/netneurotools/plotting/pyvista_plotters.py @@ -1,5 +1,6 @@ """Functions for pyvista-based plotting.""" + def pv_plot_surface(): """Plot a surface using PyVista.""" - pass \ No newline at end of file + pass diff --git a/netneurotools/plotting/tests/test_colorutils.py b/netneurotools/plotting/tests/test_color_utils.py similarity index 100% rename from netneurotools/plotting/tests/test_colorutils.py rename to netneurotools/plotting/tests/test_color_utils.py diff --git a/netneurotools/plotting/tests/test_mpl.py b/netneurotools/plotting/tests/test_mpl.py index 71141a2..1d7d79a 100644 --- a/netneurotools/plotting/tests/test_mpl.py +++ b/netneurotools/plotting/tests/test_mpl.py @@ -4,6 +4,7 @@ import matplotlib.pyplot as plt from netneurotools import plotting + def test_grid_communities(): """Test _grid_communities function.""" comms = np.asarray([0, 0, 0, 0, 1, 1, 1, 1, 2, 2]) @@ -28,6 +29,7 @@ def test_plot_mod_heatmap(): ax = plotting.plot_mod_heatmap(data, comms) assert isinstance(ax, plt.Axes) + def test_plot_point_brain(): """Test plot_point_brain function.""" data = np.random.rand(100) diff --git a/netneurotools/plotting/tests/test_pysurfer.py b/netneurotools/plotting/tests/test_pysurfer.py index 3edb943..3133b3c 100644 --- a/netneurotools/plotting/tests/test_pysurfer.py +++ b/netneurotools/plotting/tests/test_pysurfer.py @@ -4,6 +4,7 @@ import numpy as np from netneurotools import datasets, plotting + @pytest.mark.filterwarnings('ignore') def test_plot_fsvertex(): """Test plotting on a freesurfer vertex.""" @@ -24,4 +25,4 @@ def test_plot_fsaverage(): lhannot, rhannot = datasets.fetch_cammoun2012('fsaverage5')['scale033'] brain = plotting.plot_fsaverage(data, lhannot=lhannot, rhannot=rhannot, subject_id='fsaverage5', offscreen=True) - assert isinstance(brain, surfer.Brain) \ No newline at end of file + assert isinstance(brain, surfer.Brain) diff --git a/netneurotools/spatial/spatial_stats.py b/netneurotools/spatial/spatial_stats.py index 1552b04..54baddf 100644 --- a/netneurotools/spatial/spatial_stats.py +++ b/netneurotools/spatial/spatial_stats.py @@ -5,6 +5,7 @@ def morans_i(): """Calculate Moran's I for spatial autocorrelation.""" pass + def local_morans_i(): """Calculate local Moran's I for spatial autocorrelation.""" - pass \ No newline at end of file + pass diff --git a/netneurotools/stats/__init__.py b/netneurotools/stats/__init__.py index c1fc422..e000548 100644 --- a/netneurotools/stats/__init__.py +++ b/netneurotools/stats/__init__.py @@ -34,4 +34,3 @@ '_add_constant', 'residualize', 'get_dominance_stats', # stats_utils ] - diff --git a/netneurotools/stats/correlation.py b/netneurotools/stats/correlation.py index 1a2773b..71bb79e 100644 --- a/netneurotools/stats/correlation.py +++ b/netneurotools/stats/correlation.py @@ -10,6 +10,7 @@ except ImportError: # scipy < 1.8.0 from scipy.stats.stats import _chk2_asarray + def efficient_pearsonr(a, b, ddof=1, nan_policy='propagate'): """ Compute correlation of matching columns in `a` and `b`. @@ -54,7 +55,7 @@ def efficient_pearsonr(a, b, ddof=1, nan_policy='propagate'): >>> stats.efficient_pearsonr(np.c_[x1, x2], np.c_[y1, y2]) (array([0.10032565, 0.79961189]), array([3.20636135e-01, 1.97429944e-23])) """ - a, b, axis = _chk2_asarray(a, b, 0) + a, b, _ = _chk2_asarray(a, b, 0) if len(a) != len(b): raise ValueError('Provided arrays do not have same length') @@ -91,7 +92,7 @@ def efficient_pearsonr(a, b, ddof=1, nan_policy='propagate'): # taken from scipy.stats ab = (n_obs / 2) - 1 - prob = 2 * sspecial.btdtr(ab, ab, 0.5 * (1 - np.abs(corr))) + prob = 2 * sspecial.betainc(ab, ab, 0.5 * (1 - np.abs(corr))) return corr, prob diff --git a/netneurotools/stats/permutation_test.py b/netneurotools/stats/permutation_test.py index 9cb5d3c..bcd11ac 100644 --- a/netneurotools/stats/permutation_test.py +++ b/netneurotools/stats/permutation_test.py @@ -10,6 +10,7 @@ from .correlation import efficient_pearsonr + def permtest_1samp(a, popmean, axis=0, n_perm=1000, seed=0): """ Non-parametric equivalent of :py:func:`scipy.stats.ttest_1samp`. @@ -279,4 +280,4 @@ def permtest_pearsonr(a, b, axis=0, n_perm=1000, resamples=None, seed=0): pvals = permutations / (n_perm + 1) # + 1 in denom accounts for true_corr - return true_corr, pvals \ No newline at end of file + return true_corr, pvals diff --git a/netneurotools/stats/regression.py b/netneurotools/stats/regression.py index 13037af..b6b4c2c 100644 --- a/netneurotools/stats/regression.py +++ b/netneurotools/stats/regression.py @@ -9,6 +9,7 @@ from sklearn.linear_model import LinearRegression from sklearn.utils.validation import check_array + def _add_constant(data): """ Add a constant (i.e., intercept) term to `data`. @@ -87,7 +88,7 @@ def residualize(X, Y, Xc=None, Yc=None, normalize=True, add_intercept=True): # add intercept to regressors if requested and calculate fit if add_intercept: X, Xc = _add_constant(X), _add_constant(Xc) - betas, *rest = np.linalg.lstsq(Xc, Yc, rcond=None) + betas, *_ = np.linalg.lstsq(Xc, Yc, rcond=None) # remove intercept from regressors and betas for calculation of residuals if add_intercept: @@ -104,8 +105,6 @@ def residualize(X, Y, Xc=None, Yc=None, normalize=True, add_intercept=True): return Yr - - def get_dominance_stats(X, y, use_adjusted_r_sq=True, verbose=False, n_jobs=1): """ Return the dominance analysis statistics for multilinear regression. diff --git a/netneurotools/stats/stats_utils.py b/netneurotools/stats/stats_utils.py index 2d16837..9964f8e 100644 --- a/netneurotools/stats/stats_utils.py +++ b/netneurotools/stats/stats_utils.py @@ -1,3 +1 @@ """Functions for supporting statistics.""" - - diff --git a/netneurotools/stats/tests/test_correlation.py b/netneurotools/stats/tests/test_correlation.py index 725bcae..f5c8653 100644 --- a/netneurotools/stats/tests/test_correlation.py +++ b/netneurotools/stats/tests/test_correlation.py @@ -4,6 +4,7 @@ import numpy as np from netneurotools import stats + @pytest.mark.parametrize('x, y, expected', [ # basic one-dimensional input (range(5), range(5), (1.0, 0.0)), diff --git a/netneurotools/stats/tests/test_permutation.py b/netneurotools/stats/tests/test_permutation.py index 71411b0..ae6a335 100644 --- a/netneurotools/stats/tests/test_permutation.py +++ b/netneurotools/stats/tests/test_permutation.py @@ -4,6 +4,7 @@ import numpy as np from netneurotools import stats + @pytest.mark.xfail def test_permtest_1samp(): """Test permutation test for one-sample t-test.""" @@ -62,4 +63,3 @@ def test_permtest_pearsonr(): np.column_stack([y, b])) assert np.allclose(r, np.array([0.50004037, 0.89927523])) assert np.allclose(p, np.array([0.000999, 0.000999])) - diff --git a/netneurotools/stats/tests/test_regression.py b/netneurotools/stats/tests/test_regression.py index bc8b7ed..ce20638 100644 --- a/netneurotools/stats/tests/test_regression.py +++ b/netneurotools/stats/tests/test_regression.py @@ -11,4 +11,4 @@ def test_add_constant(): # if provided a 2D array it will return the same, extended by 1 column out = stats._add_constant(np.random.rand(100, 100)) - assert out.shape == (100, 101) and np.all(out[:, -1] == 1) \ No newline at end of file + assert out.shape == (100, 101) and np.all(out[:, -1] == 1) diff --git a/pyproject.toml b/pyproject.toml index 9ef75fc..8f9aaba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -99,6 +99,8 @@ ignore = [ # "W605", # Invalid escape sequence: latex "NPY002", # Replace legacy `np.random` call with `np.random.Generator` ] +extend-select = ["E302"] +preview = true [tool.ruff.lint.pydocstyle] convention = "numpy" From d55443c4141a977dece8e52cbea9d110eef01c73 Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Tue, 14 May 2024 17:44:38 -0400 Subject: [PATCH 05/32] Fix style --- .github/workflows/tests.yml | 2 +- netneurotools/plotting/color_utils.py | 1 - resources/generate_atl-cammoun2012_surface.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e4e5487..953b0fd 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -24,7 +24,7 @@ jobs: python -m pip install --upgrade pip python -m pip install ruff - name: Run style checks - run: ruff . + run: ruff check . run_tests: needs: check_style diff --git a/netneurotools/plotting/color_utils.py b/netneurotools/plotting/color_utils.py index e3eefbe..f9fb3d0 100644 --- a/netneurotools/plotting/color_utils.py +++ b/netneurotools/plotting/color_utils.py @@ -94,7 +94,6 @@ def available_cmaps(): def _register_cmaps(): """Register all colormaps in module so they are accessible via matplotlib.""" - for cmap in __all__: matplotlib.colormaps.register(globals()[cmap], name=cmap) diff --git a/resources/generate_atl-cammoun2012_surface.py b/resources/generate_atl-cammoun2012_surface.py index 686b518..6edc121 100755 --- a/resources/generate_atl-cammoun2012_surface.py +++ b/resources/generate_atl-cammoun2012_surface.py @@ -116,7 +116,7 @@ def combine_cammoun_500(lhannot, rhannot, subject_id, annot=None, quiet=quiet) # save ctab information from annotation file - vtx, ct, names = nib.freesurfer.read_annot(fn) + _, ct, names = nib.freesurfer.read_annot(fn) data = np.column_stack([[f.decode() for f in names], ct[:, :-1]]) ctab = ctab.append(pd.DataFrame(data), ignore_index=True) From 148aee8b7867b5a10054658bfc9276837556dc29 Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Tue, 14 May 2024 17:56:01 -0400 Subject: [PATCH 06/32] Fix package data --- MANIFEST.in | 3 ++- pyproject.toml | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index 6467bdd..9765c8a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,3 @@ include README.rst LICENSE environment.yml requirements.txt -include versioneer.py \ No newline at end of file +include versioneer.py +include netneurotools/datasets/datasets.json \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 8f9aaba..0e1d026 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,9 +66,15 @@ test = [ requires = ["setuptools", "versioneer[toml]"] build-backend = "setuptools.build_meta" +[tool.setuptools] +include-package-data = true + [tool.setuptools.packages] find = {} +[tool.setuptools.package-data] +"*" = ["*.json"] + [tool.setuptools.dynamic] version = {attr = "netneurotools.__version__"} From 09b9c92965a419fa03bddf5b85d3f52e72ab2351 Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Tue, 14 May 2024 19:23:59 -0400 Subject: [PATCH 07/32] Fix doctests --- netneurotools/modularity/modules.py | 14 +++++++------- netneurotools/networks/networks_utils.py | 4 ++-- netneurotools/stats/correlation.py | 14 +++++++------- netneurotools/stats/permutation_test.py | 8 ++++---- netneurotools/stats/regression.py | 4 ++-- 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/netneurotools/modularity/modules.py b/netneurotools/modularity/modules.py index a1b42fd..120e984 100644 --- a/netneurotools/modularity/modules.py +++ b/netneurotools/modularity/modules.py @@ -70,14 +70,14 @@ def match_cluster_labels(source, target): Examples -------- - >>> from netneurotools import cluster + >>> from netneurotools import modularity When cluster labels are perfectly matched but e.g., inverted the function will find a perfect mapping: >>> a = np.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0]) >>> b = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1, 1]) - >>> cluster.match_cluster_labels(a, b) + >>> modularity.match_cluster_labels(a, b) array([0, 0, 0, 1, 1, 1, 1, 1, 1, 1]) However, the mapping will work even when cluster assignments between the @@ -86,13 +86,13 @@ def match_cluster_labels(source, target): >>> a = np.array([0, 0, 0, 2, 2, 2, 2, 1, 1, 1]) >>> b = np.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0]) - >>> cluster.match_cluster_labels(a, b) + >>> modularity.match_cluster_labels(a, b) array([1, 1, 1, 0, 0, 0, 0, 2, 2, 2]) If the source assignment has fewer clusters than the target the returned values may be discontinuous: - >>> cluster.match_cluster_labels(b, a) + >>> modularity.match_cluster_labels(b, a) array([0, 0, 0, 2, 2, 2, 2, 2, 2, 2]) """ # try and match the source to target @@ -143,7 +143,7 @@ def match_assignments(assignments, target=None, seed=None): Examples -------- - >>> from netneurotools import cluster + >>> from netneurotools import modularity First we can construct a matrix of `N` samples clustered `M` times (in this case, `M` is three) . Since cluster labels are generally arbitrary we can @@ -163,7 +163,7 @@ def match_assignments(assignments, target=None, seed=None): of the columns will be randomly picked as the "target" solution, we provide a `seed` to ensure reproducibility in the selection: - >>> cluster.match_assignments(assignments, seed=1234) + >>> modularity.match_assignments(assignments, seed=1234) array([[1, 1, 1], [1, 1, 1], [1, 1, 1], @@ -185,7 +185,7 @@ def match_assignments(assignments, target=None, seed=None): ... [1, 2, 0], ... [1, 1, 2], ... [1, 1, 2]]) - >>> cluster.match_assignments(assignments) + >>> modularity.match_assignments(assignments) array([[0, 0, 0], [0, 0, 0], [0, 0, 0], diff --git a/netneurotools/networks/networks_utils.py b/netneurotools/networks/networks_utils.py index 98925b5..5f72c50 100644 --- a/netneurotools/networks/networks_utils.py +++ b/netneurotools/networks/networks_utils.py @@ -22,10 +22,10 @@ def get_triu(data, k=1): Examples -------- - >>> from netneurotools import utils + >>> from netneurotools import networks >>> X = np.array([[1, 0.5, 0.25], [0.5, 1, 0.33], [0.25, 0.33, 1]]) - >>> tri = utils.get_triu(X) + >>> tri = networks.get_triu(X) >>> tri array([0.5 , 0.25, 0.33]) """ diff --git a/netneurotools/stats/correlation.py b/netneurotools/stats/correlation.py index 71bb79e..38489fd 100644 --- a/netneurotools/stats/correlation.py +++ b/netneurotools/stats/correlation.py @@ -42,13 +42,13 @@ def efficient_pearsonr(a, b, ddof=1, nan_policy='propagate'): Examples -------- - >>> from netneurotools import datasets, stats + >>> from netneurotools import stats Generate some not-very-correlated and some highly-correlated data: >>> np.random.seed(12345678) # set random seed for reproducible results - >>> x1, y1 = datasets.make_correlated_xy(corr=0.1, size=100) - >>> x2, y2 = datasets.make_correlated_xy(corr=0.8, size=100) + >>> x1, y1 = stats.make_correlated_xy(corr=0.1, size=100) + >>> x2, y2 = stats.make_correlated_xy(corr=0.8, size=100) Calculate both correlations simultaneously: @@ -129,15 +129,15 @@ def make_correlated_xy(corr=0.85, size=10000, seed=None, tol=0.001): Examples -------- - >>> from netneurotools import datasets + >>> from netneurotools import stats By default two vectors are generated with specified correlation - >>> x, y = datasets.make_correlated_xy() + >>> x, y = stats.make_correlated_xy() >>> np.corrcoef(x, y) # doctest: +SKIP array([[1. , 0.85083661], [0.85083661, 1. ]]) - >>> x, y = datasets.make_correlated_xy(corr=0.2) + >>> x, y = stats.make_correlated_xy(corr=0.2) >>> np.corrcoef(x, y) # doctest: +SKIP array([[1. , 0.20069953], [0.20069953, 1. ]]) @@ -147,7 +147,7 @@ def make_correlated_xy(corr=0.85, size=10000, seed=None, tol=0.001): correlations are close to the desired values: >>> corr = [[1, 0.5, 0.3], [0.5, 1, 0], [0.3, 0, 1]] - >>> out = datasets.make_correlated_xy(corr=corr) + >>> out = stats.make_correlated_xy(corr=corr) >>> out.shape (3, 10000) >>> np.corrcoef(out) # doctest: +SKIP diff --git a/netneurotools/stats/permutation_test.py b/netneurotools/stats/permutation_test.py index bcd11ac..147e4d1 100644 --- a/netneurotools/stats/permutation_test.py +++ b/netneurotools/stats/permutation_test.py @@ -226,14 +226,14 @@ def permtest_pearsonr(a, b, axis=0, n_perm=1000, resamples=None, seed=0): Examples -------- - >>> from netneurotools import datasets, stats + >>> from netneurotools import stats >>> np.random.seed(12345678) # set random seed for reproducible results - >>> x, y = datasets.make_correlated_xy(corr=0.1, size=100) + >>> x, y = stats.make_correlated_xy(corr=0.1, size=100) >>> stats.permtest_pearsonr(x, y) # doctest: +SKIP (0.10032564626876286, 0.3046953046953047) - >>> x, y = datasets.make_correlated_xy(corr=0.5, size=100) + >>> x, y = stats.make_correlated_xy(corr=0.5, size=100) >>> stats.permtest_pearsonr(x, y) # doctest: +SKIP (0.500040365781984, 0.000999000999000999) @@ -247,7 +247,7 @@ def permtest_pearsonr(a, b, axis=0, n_perm=1000, resamples=None, seed=0): or by using matching columns in the two arrays (e.g., `x` and `y` vs `a` and `b`): - >>> a, b = datasets.make_correlated_xy(corr=0.9, size=100) + >>> a, b = stats.make_correlated_xy(corr=0.9, size=100) >>> stats.permtest_pearsonr(np.column_stack([x, a]), np.column_stack([y, b])) (array([0.50004037, 0.89927523]), array([0.000999, 0.000999])) """ # noqa diff --git a/netneurotools/stats/regression.py b/netneurotools/stats/regression.py index b6b4c2c..c6ac6e2 100644 --- a/netneurotools/stats/regression.py +++ b/netneurotools/stats/regression.py @@ -26,10 +26,10 @@ def _add_constant(data): Examples -------- - >>> from netneurotools import utils + >>> from netneurotools import stats >>> A = np.zeros((5, 5)) - >>> Ac = utils.add_constant(A) + >>> Ac = stats._add_constant(A) >>> Ac array([[0., 0., 0., 0., 0., 1.], [0., 0., 0., 0., 0., 1.], From 0321994f9c91de4a329fecf540f48248d4ec0001 Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Tue, 21 May 2024 22:30:55 -0400 Subject: [PATCH 08/32] Add test for custom colormaps --- netneurotools/plotting/tests/test_color_utils.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/netneurotools/plotting/tests/test_color_utils.py b/netneurotools/plotting/tests/test_color_utils.py index cbf03e7..3bd55c1 100644 --- a/netneurotools/plotting/tests/test_color_utils.py +++ b/netneurotools/plotting/tests/test_color_utils.py @@ -1 +1,10 @@ """For testing netneurotools.plotting.color_utils functionality.""" + + +def test_register_cmaps(): + """Test registering colormaps.""" + import matplotlib + if "justine" in matplotlib.colormaps: + assert True + else: + assert False From a4747f25f2fa4767416e76084d962bad0bad4609 Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Tue, 21 May 2024 22:31:14 -0400 Subject: [PATCH 09/32] Fix doc config --- docs/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/conf.py b/docs/conf.py index 37ee2e5..6ac2598 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -47,6 +47,7 @@ autodoc_default_options = {'members': True, 'inherited-members': True} numpydoc_show_class_members = False autoclass_content = "class" +napoleon_use_param = False # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] From 63b5106dfc897ed85e4b4bd469318e714090c0b9 Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Tue, 21 May 2024 22:31:40 -0400 Subject: [PATCH 10/32] Refactor datasets module --- netneurotools/datasets/datasets.json | 273 ++++------------ netneurotools/datasets/datasets_utils.py | 177 ++++++++--- netneurotools/datasets/fetch_atlas.py | 336 +++++++++++--------- netneurotools/datasets/fetch_project.py | 257 ++++++++++----- netneurotools/datasets/fetch_template.py | 330 +++++++++++--------- netneurotools/datasets/netneurotools.bib | 253 +++++++++++++++ netneurotools/datasets/references.json | 220 +++++++++++++ netneurotools/datasets/tests/test_fetch.py | 345 ++++++++++++--------- 8 files changed, 1435 insertions(+), 756 deletions(-) create mode 100644 netneurotools/datasets/netneurotools.bib create mode 100644 netneurotools/datasets/references.json diff --git a/netneurotools/datasets/datasets.json b/netneurotools/datasets/datasets.json index d68669c..e9fdd66 100644 --- a/netneurotools/datasets/datasets.json +++ b/netneurotools/datasets/datasets.json @@ -1,6 +1,7 @@ { "atl-cammoun2012": { "gcs": { + "url-type": "osf", "url": [ "mb37e", "5ce6bb4423fec40017e82c5e" @@ -8,6 +9,7 @@ "md5": "266c4520af768e766328fb8e6648005d" }, "fsaverage": { + "url-type": "osf", "url": [ "mb37e", "5ce6c30523fec40017e83439" @@ -15,6 +17,7 @@ "md5": "2a19eb4744c0ce6c243f721bd43ecff0" }, "fsaverage5": { + "url-type": "osf", "url": [ "mb37e", "5e189a1c57341903868036dd" @@ -22,6 +25,7 @@ "md5": "2afb22e1887d47f1ca81c340fff7692b" }, "fsaverage6": { + "url-type": "osf", "url": [ "mb37e", "5e189a1b5734190380804072" @@ -29,6 +33,7 @@ "md5": "1df743bff13316f67bd41d13ec691c97" }, "MNI152NLin2009aSym": { + "url-type": "osf", "url": [ "mb37e", "5e2f4bf0e71ef800301880c2" @@ -36,6 +41,7 @@ "md5": "9da30bad22d732aa5f00a6d178d087c4" }, "fslr32k": { + "url-type": "osf", "url": [ "mb37e", "5e2f4bf1e71ef80027189c56" @@ -43,33 +49,40 @@ "md5": "a5177319d5e0b8825a91d503ded1a59e" } }, - "atl-pauli2018": [ - { + "atl-pauli2018": { + "probabilistic": { + "url-type": "osf", "url": [ "jkzwp", "5b11fa3364f25a001973dce0" ], "md5": "62dd6ff405d3a8b89ee188cafa3a7f6a", - "name": "atl-pauli2018/atl-Pauli2018_space-MNI152NLin2009cAsym_hemi-both_probabilistic.nii.gz" + "folder-name": "atl-pauli2018", + "file-name": "atl-Pauli2018_space-MNI152NLin2009cAsym_hemi-both_probabilistic.nii.gz" }, - { + "deterministic": { + "url-type": "osf", "url": [ "jkzwp", "5b11fa2ff1f288000e625a7f" ], "md5": "5a5b6246921be08456304875447c68ed", - "name": "atl-pauli2018/atl-Pauli2018_space-MNI152NLin2009cAsym_hemi-both_deterministic.nii.gz" + "folder-name": "atl-pauli2018", + "file-name": "atl-Pauli2018_space-MNI152NLin2009cAsym_hemi-both_deterministic.nii.gz" }, - { + "info": { + "url-type": "osf", "url": [ "mb37e", "5c93b4f034062c001b1ef50d" ], "md5": "390a693abeb1a583151f30aa8798bab5", - "name": "atl-pauli2018/atl-Pauli2018_space-MNI152NLin2009cAsym_info.csv" + "folder-name": "atl-pauli2018", + "file-name": "atl-Pauli2018_space-MNI152NLin2009cAsym_info.csv" } - ], + }, "tpl-conte69": { + "url-type": "osf", "url": [ "fvuh8", "5b198ec5ec24e20011b48548" @@ -77,6 +90,7 @@ "md5": "bd944e3f9f343e0e51e562b440960529" }, "tpl-yerkes19": { + "url-type": "osf", "url": [ "mb37e", "60ae93d504e91a005f1761ab" @@ -85,6 +99,7 @@ }, "tpl-fsaverage": { "fsaverage": { + "url-type": "osf", "url": [ "mb37e", "5c82830a1d73810018bdacea" @@ -92,6 +107,7 @@ "md5": "1e82c52ed21d06d4e6e7341c725c5262" }, "fsaverage3": { + "url-type": "osf", "url": [ "mb37e", "5d9f83b6f6b03e000e1ba285" @@ -99,6 +115,7 @@ "md5": "b4182495d341364e3f7c5b86284d8d20" }, "fsaverage4": { + "url-type": "osf", "url": [ "mb37e", "5d9f83b7fcf91f00111c7473" @@ -106,6 +123,7 @@ "md5": "5a481421dc1286c7bd9b8a47db5fad0b" }, "fsaverage5": { + "url-type": "osf", "url": [ "mb37e", "5d9f83b6f6b03e00101c932f" @@ -113,6 +131,7 @@ "md5": "cc75f7290c03970a8b8a06dfc215e925" }, "fsaverage6": { + "url-type": "osf", "url": [ "mb37e", "5d9f83b7a7bc73000cea05f1" @@ -123,6 +142,7 @@ "tpl-civet": { "v1": { "civet41k": { + "url-type": "osf", "url": [ "mb37e", "601daffd84ecf800fe031868" @@ -132,6 +152,7 @@ }, "v2": { "civet41k": { + "url-type": "osf", "url": [ "mb37e", "601dafe77ad0a80119d9483c" @@ -139,6 +160,7 @@ "md5": "a47b015e471c6a800d236f107fda5b4a" }, "civet164k": { + "url-type": "osf", "url": [ "mb37e", "601dafe87ad0a8011ad94938" @@ -147,209 +169,16 @@ } } }, - "ds-famous-gmat": { - "celegans": { - "url": [ - "mb37e", - "5d9b8e4aa7bc73000be65508" - ], - "md5": "f35cd893bc1aff4e8184a528fcda14b9", - "keys": [ - "conn", - "dist", - "labels" - ] - }, - "drosophila": { - "url": [ - "mb37e", - "5d9b8e4aa7bc73000ce65d00" - ], - "md5": "6a67a4fc1b4f35b72c42cca4d0827249", - "keys": [ - "conn", - "coords", - "labels", - "networks" - ] - }, - "human_func_scale033": { - "url": [ - "mb37e", - "5d9b8e4afcf91f000f18f57b" - ], - "md5": "1988ab427d9bc0de075bbe600ce0a27f", - "keys": [ - "conn", - "coords", - "labels" - ] - }, - "human_func_scale060": { - "url": [ - "mb37e", - "5d9b8e4aa7bc73000de67117" - ], - "md5": "4191f5a2b0c5063dcba9935ea0ef0bfe", - "keys": [ - "conn", - "coords", - "labels" - ] - }, - "human_func_scale125": { - "url": [ - "mb37e", - "5d9b8e4b26eb50000e78c987" - ], - "md5": "533e11cf9fea67d536648c9ef939a5f5", - "keys": [ - "conn", - "coords", - "labels" - ] - }, - "human_func_scale250": { - "url": [ - "mb37e", - "5d9b8e4efcf91f0012190ba1" - ], - "md5": "4abc7324c2a9ae04ef6cf5555149b3f4", - "keys": [ - "conn", - "coords", - "labels" - ] - }, - "human_func_scale500": { - "url": [ - "mb37e", - "5d9b8e4ff6b03e000d18b5a1" - ], - "md5": "637c6057476b2508f15f244d528e156d", - "keys": [ - "conn", - "coords", - "labels" - ] - }, - "human_struct_scale033": { - "url": [ - "mb37e", - "5d9b8e4f26eb50000e78c993" - ], - "md5": "27a2101f2f04e0fc8de09a8248793235", - "keys": [ - "conn", - "coords", - "dist", - "labels" - ] - }, - "human_struct_scale060": { - "url": [ - "mb37e", - "5d9b8e4da7bc73000be6550e" - ], - "md5": "9289265ab1bd0fa18611eeaf1afce745", - "keys": [ - "conn", - "coords", - "dist", - "labels" - ] - }, - "human_struct_scale125": { - "url": [ - "mb37e", - "5d9b8e50f6b03e000e18aa37" - ], - "md5": "07e60b141809babe8c2645d93cd24984", - "keys": [ - "conn", - "coords", - "dist", - "labels" - ] - }, - "human_struct_scale250": { - "url": [ - "mb37e", - "5d9b8e51fcf91f001118fdc2" - ], - "md5": "56f9ca8b4ecc63ef9aaf64a606755c09", - "keys": [ - "conn", - "coords", - "dist", - "labels" - ] - }, - "human_struct_scale500": { - "url": [ - "mb37e", - "5d9b8e51a7bc73000ee65769" - ], - "md5": "94724e0446f8cb06207a4521ba1df20f", - "keys": [ - "conn", - "coords", - "dist", - "labels" - ] - }, - "macaque_markov": { - "url": [ - "mb37e", - "5d9b8e56a7bc73000ce65d11" - ], - "md5": "5ce43182afc9c4f779db2c0306afb202", - "keys": [ - "conn", - "dist", - "labels" - ] - }, - "macaque_modha": { - "url": [ - "mb37e", - "5d9b8e5626eb50000d78abd0" - ], - "md5": "f467c62b2670feaf75c93d90d5ed5de6", - "keys": [ - "conn", - "coords", - "dist", - "labels" - ] - }, - "mouse": { - "url": [ - "mb37e", - "5d9b8e5626eb50000e78c9a0" - ], - "md5": "dba5cbbb9e72c1cacda945086d77a125", - "keys": [ - "conn", - "coords", - "dist", - "labels", - "acronyms" - ] - }, - "rat": { - "url": [ - "mb37e", - "5d9b8e56f6b03e000f18d06f" - ], - "md5": "9e1f12ce4fa42082a76d62f89670f5d0", - "keys": [ - "conn", - "labels" - ] - } + "ds-famous_gmat": { + "url-type": "osf", + "url": [ + "mb37e", + "664683ca4664da9ebced6b70" + ], + "md5": "b803de1058579881a759f475704e9f35" }, "ds-vazquez_rodriguez2019": { + "url-type": "osf", "url": [ "mb37e", "5d9f5aa4f6b03e000e1b819e" @@ -358,6 +187,7 @@ }, "atl-schaefer2018": { "fsaverage": { + "url-type": "osf", "url": [ "mb37e", "5dbc8d7dcfc96c000dc3581c" @@ -365,6 +195,7 @@ "md5": "74dfe4237efaccabf057897c49e8af94" }, "fsaverage5": { + "url-type": "osf", "url": [ "mb37e", "5dbc8d7daf84c3000eebffb2" @@ -372,6 +203,7 @@ "md5": "45a8c784f1979eb33a119bdab912a51f" }, "fsaverage6": { + "url-type": "osf", "url": [ "mb37e", "5dbc8d7bcfc96c000ec6dca2" @@ -379,6 +211,7 @@ "md5": "8738daccab4648c3e891a1c8d3a9ec1f" }, "fslr32k": { + "url-type": "osf", "url": [ "mb37e", "5e3086e4af75930094bdd507" @@ -388,6 +221,7 @@ }, "atl-mmpall": { "fslr32k": { + "url-type": "osf", "url": [ "mb37e", "6047bac259e910009b83114f" @@ -396,6 +230,7 @@ } }, "atl-voneconomo_koskinas": { + "url-type": "osf", "url": [ "mb37e", "5ed80005fabc45000d639900" @@ -404,11 +239,29 @@ }, "tpl-hcp_standards": { "standard_mesh_atlases": { + "url-type": "osf", "url": [ "mb37e", "6643d2ab2eacc48a57097091" ], - "md5": "806abac71f76b8dba8af467ef313c3f7" + "md5": "806abac71f76b8dba8af467ef313c3f7", + "keys": [ + "fs_LR_32k", + "fsaverage", + "fsaverage5", + "fsaverage6", + "MNI152NLin2009cAsym" + ] } + }, + "ds-hansen_manynetworks": { + "url-type": "github-release", + "url": [ + "netneurolab", + "hansen_many_networks", + "v1.0.0" + ], + "folder-name": "hansen_many_networks-1.0.0", + "md5": "9e503c759506293aa441054cfd206ccc" } } diff --git a/netneurotools/datasets/datasets_utils.py b/netneurotools/datasets/datasets_utils.py index 42b4920..a6e9cf5 100644 --- a/netneurotools/datasets/datasets_utils.py +++ b/netneurotools/datasets/datasets_utils.py @@ -6,12 +6,6 @@ from collections import namedtuple import importlib.resources -if getattr(importlib.resources, 'files', None) is not None: - _importlib_avail = True -else: - from pkg_resources import resource_filename - _importlib_avail = False - SURFACE = namedtuple('Surface', ('lh', 'rh')) @@ -20,7 +14,32 @@ ] -def _osfify_urls(data): +def _get_data_dir(data_dir=None): + """ + Get path to netneurotools data directory. + + Parameters + ---------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + + Returns + ------- + data_dir : str + Path to use as data directory + """ + if data_dir is None: + data_dir = os.environ.get('NNT_DATA', os.path.join('~', 'nnt-data')) + data_dir = os.path.expanduser(data_dir) + if not os.path.exists(data_dir): + os.makedirs(data_dir) + + return data_dir + + +def _decode_urls(data): """ Format `data` object with OSF API URL. @@ -35,29 +54,53 @@ def _osfify_urls(data): Input data with all `url` dict keys formatted """ OSF_API = "https://files.osf.io/v1/resources/{}/providers/osfstorage/{}" + GITHUB_RELEASE_API = "https://github.com/{}/{}/archive/refs/tags/{}.tar.gz" - if isinstance(data, str): + if isinstance(data, str) or isinstance(data, list): return data elif 'url' in data: - data['url'] = OSF_API.format(*data['url']) + if data['url-type'] == 'osf': + data['url'] = OSF_API.format(*data['url']) + elif data['url-type'] == 'github-release': + data['url'] = GITHUB_RELEASE_API.format(*data['url']) + else: + raise ValueError("URL type {} not recognized".format(data['url-type'])) - try: - for key, value in data.items(): - data[key] = _osfify_urls(value) - except AttributeError: - for n, value in enumerate(data): - data[n] = _osfify_urls(value) + for key, value in data.items(): + data[key] = _decode_urls(value) return data -if _importlib_avail: - osf = importlib.resources.files("netneurotools") / "datasets/datasets.json" -else: - osf = resource_filename('netneurotools', 'datasets/datasets.json') +def _load_resource_json(relative_path): + """ + Load JSON file from package resources. + + Parameters + ---------- + relative_path : str + Path to JSON file relative to package resources + + Returns + ------- + resource_json : dict + JSON file loaded as a dictionary + """ + # handling pkg_resources.resource_filename deprecation + if getattr(importlib.resources, 'files', None) is not None: + f_resource = importlib.resources.files("netneurotools") / relative_path + else: + from pkg_resources import resource_filename + f_resource = resource_filename('netneurotools', relative_path) + + with open(f_resource) as src: + resource_json = json.load(src) + + return resource_json + -with open(osf) as src: - OSF_RESOURCES = _osfify_urls(json.load(src)) +NNT_DATASETS = _load_resource_json('datasets/datasets.json') +NNT_DATASETS = _decode_urls(NNT_DATASETS) def _get_dataset_info(name): @@ -77,35 +120,95 @@ def _get_dataset_info(name): MD5 checksum for file downloade from `url` """ try: - return OSF_RESOURCES[name] + return NNT_DATASETS[name] except KeyError: raise KeyError("Provided dataset '{}' is not valid. Must be one of: {}" - .format(name, sorted(OSF_RESOURCES.keys()))) from None + .format(name, sorted(NNT_DATASETS.keys()))) from None -def _get_data_dir(data_dir=None): +NNT_REFERENCES = _load_resource_json('datasets/references.json') + + +def _get_reference_info(name, verbose=1, return_dict=False): """ - Get path to netneurotools data directory. + Return reference information for dataset `name`. Parameters ---------- - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None + name : str + Name of dataset Returns ------- - data_dir : str - Path to use as data directory + reference : str + Reference information for dataset """ - if data_dir is None: - data_dir = os.environ.get('NNT_DATA', os.path.join('~', 'nnt-data')) - data_dir = os.path.expanduser(data_dir) - if not os.path.exists(data_dir): - os.makedirs(data_dir) + try: + curr_refs = NNT_REFERENCES[name] + if verbose: + print("Please cite the following papers if you are using this function:") + for bib_category, bib_category_items in curr_refs.items(): + print(f" [{bib_category}]:") + for bib_item in bib_category_items: + print(f" {bib_item['citation']}") - return data_dir + if return_dict: + return curr_refs + + except KeyError: + raise KeyError("Provided dataset '{}' is not valid. Must be one of: {}" + .format(name, sorted(NNT_REFERENCES.keys()))) from None + + +def _fill_reference_json(bib_file, json_file, overwrite=False, use_defaults=False): + """ + Fill in citation information for references in a JSON file. + + For internal use only. + + Parameters + ---------- + bib_file : str + Path to BibTeX file containing references + json_file : str + Path to JSON file containing references + overwrite : bool, optional + Whether to overwrite existing citation information. Default: False + use_defaults : bool, optional + Whether to use default paths for `bib_file` and `json_file`. Default: False + + Returns + ------- + None + """ + if use_defaults: + bib_file = \ + importlib.resources.files("netneurotools") / "datasets/netneurotools.bib" + json_file = \ + importlib.resources.files("netneurotools") / "datasets/references.json" + + from pybtex import PybtexEngine + engine = PybtexEngine() + + def _get_citation(key): + s = engine.format_from_file( + filename=bib_file, style="unsrt", + citations=[key], output_backend="plaintext" + ) + return s.strip("\n").replace("[1] ", "") + + with open(json_file) as src: + nnt_refs = json.load(src) + + for _, value in nnt_refs.items(): + for bib_category in value: + for bib_item in value[bib_category]: + if bib_item["bibkey"] not in ["", None]: + if bib_item["citation"] == "" or overwrite: + bib_item["citation"] = _get_citation(bib_item["bibkey"]) + + with open(json_file, "w") as dst: + json.dump(nnt_refs, dst, indent=4) def _check_freesurfer_subjid(subject_id, subjects_dir=None): diff --git a/netneurotools/datasets/fetch_atlas.py b/netneurotools/datasets/fetch_atlas.py index 56571cc..2b3feec 100644 --- a/netneurotools/datasets/fetch_atlas.py +++ b/netneurotools/datasets/fetch_atlas.py @@ -1,6 +1,5 @@ """Functions for fetching atlas data.""" import itertools -import os.path as op import warnings try: @@ -13,15 +12,21 @@ from .datasets_utils import ( SURFACE, - _get_data_dir, _get_dataset_info + _get_data_dir, _get_dataset_info, _get_reference_info ) -def fetch_cammoun2012(version='MNI152NLin2009aSym', data_dir=None, url=None, - resume=True, verbose=1): +def fetch_cammoun2012( + version='MNI152NLin2009aSym', + data_dir=None, resume=True, verbose=1 + ): """ Download files for Cammoun et al., 2012 multiscale parcellation. + This dataset contains + + If you used this data, please cite 1_. + Parameters ---------- version : str, optional @@ -32,18 +37,6 @@ def fetch_cammoun2012(version='MNI152NLin2009aSym', data_dir=None, url=None, fs_LR_32k HCP space, and 'gcs' will return FreeSurfer-style .gcs probabilistic atlas files for generating new, subject-specific parcellations. Default: 'MNI152NLin2009aSym' - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 Returns ------- @@ -52,16 +45,29 @@ def fetch_cammoun2012(version='MNI152NLin2009aSym', data_dir=None, url=None, 'scale250', 'scale500'], where corresponding values are lists of filepaths to downloaded parcellation files. - References - ---------- - Cammoun, L., Gigandet, X., Meskaldji, D., Thiran, J. P., Sporns, O., Do, K. - Q., Maeder, P., and Meuli, R., & Hagmann, P. (2012). Mapping the human - connectome at multiple scales with diffusion spectrum MRI. Journal of - Neuroscience Methods, 203(2), 386-397. + Other Parameters + ---------------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 Notes ----- License: https://raw.githubusercontent.com/LTS5/cmp/master/COPYRIGHT + + References + ---------- + .. [1] Leila Cammoun, Xavier Gigandet, Djalel Meskaldji, Jean Philippe + Thiran, Olaf Sporns, Kim Q Do, Philippe Maeder, Reto Meuli, and Patric + Hagmann. Mapping the human connectome at multiple scales with diffusion + spectrum mri. Journal of neuroscience methods, 203(2):386\u2013397, + 2012. """ if version == 'surface': warnings.warn('Providing `version="surface"` is deprecated and will ' @@ -87,13 +93,12 @@ def fetch_cammoun2012(version='MNI152NLin2009aSym', data_dir=None, url=None, .format(version, versions)) dataset_name = 'atl-cammoun2012' + _get_reference_info(dataset_name, verbose=verbose) + keys = ['scale033', 'scale060', 'scale125', 'scale250', 'scale500'] data_dir = _get_data_dir(data_dir=data_dir) info = _get_dataset_info(dataset_name)[version] - if url is None: - url = info['url'] - opts = { 'uncompress': True, 'md5sum': info['md5'], @@ -102,34 +107,39 @@ def fetch_cammoun2012(version='MNI152NLin2009aSym', data_dir=None, url=None, # filenames differ based on selected version of dataset if version == 'MNI152NLin2009aSym': - filenames = [ - 'atl-Cammoun2012_space-MNI152NLin2009aSym_res-{}_deterministic{}' - .format(res[-3:], suff) for res in keys for suff in ['.nii.gz'] - ] + ['atl-Cammoun2012_space-MNI152NLin2009aSym_info.csv'] + _filenames = [ + f'{dataset_name}/{version}/' + f'atl-Cammoun2012_space-MNI152NLin2009aSym_res-{res[-3:]}' + f'_deterministic{suff}' + for res in keys for suff in ['.nii.gz'] + ] + [ + f'{dataset_name}/{version}/' + f'atl-Cammoun2012_space-MNI152NLin2009aSym_info.csv' + ] elif version == 'fslr32k': - filenames = [ - 'atl-Cammoun2012_space-fslr32k_res-{}_hemi-{}_deterministic{}' - .format(res[-3:], hemi, suff) for res in keys - for hemi in ['L', 'R'] for suff in ['.label.gii'] + _filenames = [ + f'{dataset_name}/{version}/' + f'atl-Cammoun2012_space-fslr32k_res-{res[-3:]}_hemi-{hemi}' + f'_deterministic{suff}' + for res in keys for hemi in ['L', 'R'] for suff in ['.label.gii'] ] elif version in ('fsaverage', 'fsaverage5', 'fsaverage6'): - filenames = [ - 'atl-Cammoun2012_space-{}_res-{}_hemi-{}_deterministic{}' - .format(version, res[-3:], hemi, suff) for res in keys - for hemi in ['L', 'R'] for suff in ['.annot'] + _filenames = [ + f'{dataset_name}/{version}/' + f'atl-Cammoun2012_space-{version}_res-{res[-3:]}_hemi-{hemi}' + f'_deterministic{suff}' + for res in keys for hemi in ['L', 'R'] for suff in ['.annot'] ] else: - filenames = [ - 'atl-Cammoun2012_res-{}_hemi-{}_probabilistic{}' - .format(res[5:], hemi, suff) + _filenames = [ + f'{dataset_name}/{version}/' + f'atl-Cammoun2012_res-{res[5:]}_hemi-{hemi}' + f'_probabilistic{suff}' for res in keys[:-1] + ['scale500v1', 'scale500v2', 'scale500v3'] for hemi in ['L', 'R'] for suff in ['.gcs', '.ctab'] ] - - files = [ - (op.join(dataset_name, version, f), url, opts) for f in filenames - ] - data = fetch_files(data_dir, files=files, resume=resume, verbose=verbose) + _files = [(f, info['url'], opts) for f in _filenames] + data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) if version == 'MNI152NLin2009aSym': keys += ['info'] @@ -143,45 +153,52 @@ def fetch_cammoun2012(version='MNI152NLin2009aSym', data_dir=None, url=None, return Bunch(**dict(zip(keys, data))) -def fetch_schaefer2018(version='fsaverage', data_dir=None, url=None, - resume=True, verbose=1): +def fetch_schaefer2018( + version='fsaverage', + data_dir=None, resume=True, verbose=1 + ): """ Download FreeSurfer .annot files for Schaefer et al., 2018 parcellation. + This dataset contains + + If you used this data, please cite 1_. + Parameters ---------- version : {'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k'} Specifies which surface annotation files should be matched to. Default: 'fsaverage' + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with keys of format '{}Parcels{}Networks' where + corresponding values are the left/right hemisphere annotation files + + Other Parameters + ---------------- data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True + Whether to attempt to resume partial download, if possible. Default: True verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 - Returns - ------- - filenames : :class:`sklearn.utils.Bunch` - Dictionary-like object with keys of format '{}Parcels{}Networks' where - corresponding values are the left/right hemisphere annotation files - - References - ---------- - Schaefer, A., Kong, R., Gordon, E. M., Laumann, T. O., Zuo, X. N., Holmes, - A. J., ... & Yeo, B. T. (2017). Local-global parcellation of the human - cerebral cortex from intrinsic functional connectivity MRI. Cerebral - Cortex, 28(9), 3095-3114. - Notes ----- License: https://github.com/ThomasYeoLab/CBIG/blob/master/LICENSE.md + + References + ---------- + .. [1] Alexander Schaefer, Ru Kong, Evan M Gordon, Timothy O Laumann, + Xi-Nian Zuo, Avram J Holmes, Simon B Eickhoff, and BT Thomas Yeo. + Local-global parcellation of the human cerebral cortex from intrinsic + functional connectivity mri. Cerebral cortex, 28(9):3095\u20133114, + 2018. """ versions = ['fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k'] if version not in versions: @@ -190,6 +207,8 @@ def fetch_schaefer2018(version='fsaverage', data_dir=None, url=None, .format(version, versions)) dataset_name = 'atl-schaefer2018' + _get_reference_info(dataset_name, verbose=verbose) + keys = [ '{}Parcels{}Networks'.format(p, n) for p in range(100, 1001, 100) for n in [7, 17] @@ -197,9 +216,6 @@ def fetch_schaefer2018(version='fsaverage', data_dir=None, url=None, data_dir = _get_data_dir(data_dir=data_dir) info = _get_dataset_info(dataset_name)[version] - if url is None: - url = info['url'] - opts = { 'uncompress': True, 'md5sum': info['md5'], @@ -210,15 +226,17 @@ def fetch_schaefer2018(version='fsaverage', data_dir=None, url=None, hemispheres, suffix = ['LR'], 'dlabel.nii' else: hemispheres, suffix = ['L', 'R'], 'annot' - filenames = [ - 'atl-Schaefer2018_space-{}_hemi-{}_desc-{}_deterministic.{}' - .format(version, hemi, desc, suffix) + + _filenames = [ + f'{dataset_name}/{version}/' + f'atl-Schaefer2018_space-{version}_hemi-{hemi}_desc-{desc}' + f'_deterministic.{suffix}' for desc in keys for hemi in hemispheres ] - files = [(op.join(dataset_name, version, f), url, opts) - for f in filenames] - data = fetch_files(data_dir, files=files, resume=resume, verbose=verbose) + _files = [(f, info['url'], opts) for f in _filenames] + + data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) if suffix == 'annot': data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)] @@ -226,45 +244,51 @@ def fetch_schaefer2018(version='fsaverage', data_dir=None, url=None, return Bunch(**dict(zip(keys, data))) -def fetch_mmpall(version='fslr32k', data_dir=None, url=None, resume=True, - verbose=1): +def fetch_mmpall( + version='fslr32k', + data_dir=None, resume=True, verbose=1 + ): """ Download .label.gii files for Glasser et al., 2016 MMPAll atlas. + This dataset contains + + If you used this data, please cite 1_. + Parameters ---------- version : {'fslr32k'} Specifies which surface annotation files should be matched to. Default: 'fslr32k' + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Namedtuple with fields ('lh', 'rh') corresponding to filepaths to + left/right hemisphere parcellation files + + Other Parameters + ---------------- data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True + Whether to attempt to resume partial download, if possible. Default: True verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 - Returns - ------- - filenames : :class:`sklearn.utils.Bunch` - Namedtuple with fields ('lh', 'rh') corresponding to filepaths to - left/right hemisphere parcellation files + Notes + ----- + License: https://www.humanconnectome.org/study/hcp-young-adult/document/wu-minn-hcp-consortium-open-access-data-use-terms References ---------- - Glasser, M. F., Coalson, T. S., Robinson, E. C., Hacker, C. D., Harwell, - J., Yacoub, E., ... & Van Essen, D. C. (2016). A multi-modal parcellation - of human cerebral cortex. Nature, 536(7615), 171-178. - - Notes - ----- - License: https://www.humanconnectome.org/study/hcp-young-adult/document/ - wu-minn-hcp-consortium-open-access-data-use-terms + .. [1] Matthew F Glasser, Timothy S Coalson, Emma C Robinson, Carl D Hacker, + John Harwell, Essa Yacoub, Kamil Ugurbil, Jesper Andersson, Christian F + Beckmann, Mark Jenkinson, and others. A multi-modal parcellation of + human cerebral cortex. Nature, 536(7615):171\u2013178, 2016. """ versions = ['fslr32k'] if version not in versions: @@ -273,77 +297,85 @@ def fetch_mmpall(version='fslr32k', data_dir=None, url=None, resume=True, .format(version, versions)) dataset_name = 'atl-mmpall' + _get_reference_info(dataset_name, verbose=verbose) data_dir = _get_data_dir(data_dir=data_dir) info = _get_dataset_info(dataset_name)[version] - if url is None: - url = info['url'] opts = { 'uncompress': True, 'md5sum': info['md5'], 'move': '{}.tar.gz'.format(dataset_name) } - hemispheres = ['L', 'R'] - filenames = [ - 'atl-MMPAll_space-{}_hemi-{}_deterministic.label.gii' - .format(version, hemi) for hemi in hemispheres + _filenames = [ + f'{dataset_name}/{version}/' + f'atl-MMPAll_space-{version}_hemi-{hemi}_deterministic.label.gii' + for hemi in ['L', 'R'] ] + _files = [(f, info['url'], opts) for f in _filenames] - files = [(op.join(dataset_name, version, f), url, opts) for f in filenames] - data = fetch_files(data_dir, files=files, resume=resume, verbose=verbose) + data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) return SURFACE(*data) -def fetch_pauli2018(data_dir=None, url=None, resume=True, verbose=1): +def fetch_pauli2018(data_dir=None, resume=True, verbose=1): """ Download files for Pauli et al., 2018 subcortical parcellation. - Parameters - ---------- + This dataset contains + + If you used this data, please cite 1_. + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with keys ['probabilistic', 'deterministic'], + where corresponding values are filepaths to downloaded atlas files. + + Other Parameters + ---------------- data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True + Whether to attempt to resume partial download, if possible. Default: True verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 - Returns - ------- - filenames : :class:`sklearn.utils.Bunch` - Dictionary-like object with keys ['probabilistic', 'deterministic'], - where corresponding values are filepaths to downloaded atlas files. - - References - ---------- - Pauli, W. M., Nili, A. N., & Tyszka, J. M. (2018). A high-resolution - probabilistic in vivo atlas of human subcortical brain nuclei. Scientific - Data, 5, 180063. - Notes ----- License: CC-BY Attribution 4.0 International + + References + ---------- + .. [1] Wolfgang M Pauli, Amanda N Nili, and J Michael Tyszka. A + high-resolution probabilistic in vivo atlas of human subcortical brain + nuclei. Scientific data, 5(1):1\u201313, 2018. """ dataset_name = 'atl-pauli2018' + _get_reference_info(dataset_name, verbose=verbose) + keys = ['probabilistic', 'deterministic', 'info'] data_dir = _get_data_dir(data_dir=data_dir) info = _get_dataset_info(dataset_name) - # format the query how fetch_files() wants things and then download data - files = [ - (i['name'], i['url'], dict(md5sum=i['md5'], move=i['name'])) - for i in info - ] + _files = [] + for _, v in info.items(): + _f = f'{v["folder-name"]}/{v["file-name"]}' + _url = v['url'] + _opts = { + 'md5sum': v['md5'], + 'move': f'{v["folder-name"]}/{v["file-name"]}' + } + _files.append( + (_f, _url, _opts) + ) - data = fetch_files(data_dir, files=files, resume=resume, verbose=verbose) + data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) return Bunch(**dict(zip(keys, data))) @@ -357,54 +389,60 @@ def fetch_voneconomo(data_dir=None, url=None, resume=True, verbose=1): """ Fetch von-Economo Koskinas probabilistic FreeSurfer atlas. - Parameters - ---------- + This dataset contains + + If you used this data, please cite 1_. + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with keys ['gcs', 'ctab', 'info'] + + Other Parameters + ---------------- data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True + Whether to attempt to resume partial download, if possible. Default: True verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 - Returns - ------- - filenames : :class:`sklearn.utils.Bunch` - Dictionary-like object with keys ['gcs', 'ctab', 'info'] - - References - ---------- - Scholtens, L. H., de Reus, M. A., de Lange, S. C., Schmidt, R., & van den - Heuvel, M. P. (2018). An MRI von Economo–Koskinas atlas. NeuroImage, 170, - 249-256. - Notes ----- License: CC-BY-NC-SA 4.0 + + References + ---------- + .. [1] Lianne H Scholtens, Marcel A de Reus, Siemon C de Lange, Ruben + Schmidt, and Martijn P van den Heuvel. An mri von economo\u2013koskinas + atlas. NeuroImage, 170:249\u2013256, 2018. """ dataset_name = 'atl-voneconomo_koskinas' + _get_reference_info(dataset_name, verbose=verbose) + keys = ['gcs', 'ctab', 'info'] data_dir = _get_data_dir(data_dir=data_dir) info = _get_dataset_info(dataset_name) - if url is None: - url = info['url'] opts = { 'uncompress': True, 'md5sum': info['md5'], 'move': '{}.tar.gz'.format(dataset_name) } - filenames = [ - 'atl-vonEconomoKoskinas_hemi-{}_probabilistic.{}'.format(hemi, suff) + + _filenames = [ + f'{dataset_name}/' + f'atl-vonEconomoKoskinas_hemi-{hemi}_probabilistic.{suff}' for hemi in ['L', 'R'] for suff in ['gcs', 'ctab'] - ] + ['atl-vonEconomoKoskinas_info.csv'] - files = [(op.join(dataset_name, f), url, opts) for f in filenames] - data = fetch_files(data_dir, files=files, resume=resume, verbose=verbose) + ] + [ + f'{dataset_name}/atl-vonEconomoKoskinas_info.csv' + ] + _files = [(f, info['url'], opts) for f in _filenames] + data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) + data = [SURFACE(*data[:-1:2])] + [SURFACE(*data[1:-1:2])] + [data[-1]] return Bunch(**dict(zip(keys, data))) diff --git a/netneurotools/datasets/fetch_project.py b/netneurotools/datasets/fetch_project.py index 4b382d1..e9a592d 100644 --- a/netneurotools/datasets/fetch_project.py +++ b/netneurotools/datasets/fetch_project.py @@ -1,6 +1,6 @@ """Functions for fetching project data.""" import os -import os.path as op +from pathlib import Path import numpy as np try: @@ -12,64 +12,75 @@ from sklearn.utils import Bunch from .datasets_utils import ( - _get_data_dir, _get_dataset_info + _get_data_dir, _get_dataset_info, _get_reference_info ) from ._mirchi2018 import _get_fc, _get_panas -def fetch_vazquez_rodriguez2019(data_dir=None, url=None, resume=True, - verbose=1): +def fetch_vazquez_rodriguez2019(data_dir=None, resume=True, verbose=1): """ Download files from Vazquez-Rodriguez et al., 2019, PNAS. - Parameters - ---------- + This dataset contains one file: rsquared_gradient.csv, which contains + two columns: rsquared and gradient. + + If you used this data, please cite [1]_. + + Returns + ------- + data : :class:`sklearn.utils.Bunch` + Dictionary-like object with fetched data. + + Other Parameters + ---------------- data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True + Whether to attempt to resume partial download, if possible. Default: True verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 - Returns - ------- - data : :class:`sklearn.utils.Bunch` - Dictionary-like object with keys ['rsquared', 'gradient'] containing - 1000 values from - References ---------- - See `ref` key of returned dictionary object for relevant dataset reference + .. [1] Bertha V\u00e1zquez-Rodr\u00edguez, Laura E Su\u00e1rez, Ross D + Markello, Golia Shafiei, Casey Paquola, Patric Hagmann, Martijn P Van + Den Heuvel, Boris C Bernhardt, R Nathan Spreng, and Bratislav Misic. + Gradients of structure\u2013function tethering across neocortex. + Proceedings of the National Academy of Sciences, + 116(42):21219\u201321227, 2019. """ dataset_name = 'ds-vazquez_rodriguez2019' + _get_reference_info(dataset_name, verbose=verbose) data_dir = _get_data_dir(data_dir=data_dir) info = _get_dataset_info(dataset_name) - if url is None: - url = info['url'] opts = { 'uncompress': True, 'md5sum': info['md5'], 'move': '{}.tar.gz'.format(dataset_name) } - - filenames = [ - op.join(dataset_name, 'rsquared_gradient.csv') - ] - data = fetch_files(data_dir, files=[(f, url, opts) for f in filenames], - resume=resume, verbose=verbose) + fetched = fetch_files( + data_dir, + files=[(dataset_name, info['url'], opts)], + resume=resume, verbose=verbose + ) + fetched = Path(fetched[0]) # load data - rsq, grad = np.loadtxt(data[0], delimiter=',', skiprows=1).T + rsq, grad = np.loadtxt( + fetched / "rsquared_gradient.csv", + delimiter=',', skiprows=1 + ).T + data = { + 'rsquared': rsq, + 'gradient': grad + } - return Bunch(rsquared=rsq, gradient=grad) + return Bunch(**data) def fetch_mirchi2018(data_dir=None, resume=True, verbose=1): @@ -116,9 +127,70 @@ def fetch_mirchi2018(data_dir=None, resume=True, verbose=1): return X, Y -def fetch_hansen_manynetworks(): - """Download files from Hansen et al., 2023, PLOS Biology.""" - pass +def fetch_hansen_manynetworks(data_dir=None, resume=True, verbose=1): + """ + Download files from Hansen et al., 2023, PLOS Biology. + + This dataset contains + + If you used this data, please cite [1]_. + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with fetched data. + + Other Parameters + ---------------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + References + ---------- + .. [1] + """ + dataset_name = 'ds-hansen_manynetworks' + _get_reference_info(dataset_name, verbose=verbose) + + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(dataset_name) + opts = { + 'uncompress': True, + 'md5sum': info['md5'], + 'move': f'{dataset_name}/{dataset_name}.tar.gz' + } + # the download info["folder-name"].tar.gz was moved to + # {dataset_name}/{dataset_name}.tar.gz and uncompressed + # to keep the same structure as other datasets + fetched = fetch_files( + data_dir, + files=[(f'{dataset_name}/{info["folder-name"]}', info['url'], opts)], + resume=resume, verbose=verbose + ) + fetched = Path(fetched[0]) + + # load data + data = { + "cammoun033": { + "gene": fetched / "data/Cammoun033/gene_coexpression.npy", + "func": fetched / "data/Cammoun033/func_coactivation.npy", + }, + "schaefer100": { + "gene": fetched / "data/Schaefer100/gene_coexpression.npy", + }, + "schaefer400": { + "gene": fetched / "data/Schaefer400/gene_coexpression.npy", + } + } + + return Bunch(**data) def fetch_hansen_receptors(): @@ -146,28 +218,22 @@ def fetch_suarez_mami(): pass -def fetch_famous_gmat(dataset, data_dir=None, url=None, resume=True, - verbose=1): +def fetch_famous_gmat( + dataset, + data_dir=None, resume=True, verbose=1 + ): """ Download files from multi-species connectomes. + This dataset contains + + If you used this data, please cite celegans [1]_, drosophila [2]_, human + [3]_, macaque_markov [4]_, macaque_modha [5]_, mouse [6]_, rat [7]_. + Parameters ---------- dataset : str - Specifies which dataset to download; must be one of the datasets listed - in :func:`netneurotools.datasets.available_connectomes()`. - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 + Specifies which dataset to download. Returns ------- @@ -178,46 +244,99 @@ def fetch_famous_gmat(dataset, data_dir=None, url=None, resume=True, Euclidean distances between regions of 'conn'), 'coords' (an array of xyz coordinates for regions of 'conn'), 'acronyms' (an array of acronyms for regions of 'conn'), and 'networks' (an array of network - affiliations for regions of 'conn') + affiliations for regions of 'conn'). + + Other Parameters + ---------------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 References ---------- - See `ref` key of returned dictionary object for relevant dataset reference + .. [1] Lav R Varshney, Beth L Chen, Eric Paniagua, David H Hall, and Dmitri + B Chklovskii. Structural properties of the caenorhabditis elegans + neuronal network. PLoS computational biology, 7(2):e1001066, 2011. + .. [2] Ann-Shyn Chiang, Chih-Yung Lin, Chao-Chun Chuang, Hsiu-Ming Chang, + Chang-Huain Hsieh, Chang-Wei Yeh, Chi-Tin Shih, Jian-Jheng Wu, Guo-Tzau + Wang, Yung-Chang Chen, and others. Three-dimensional reconstruction of + brain-wide wiring networks in drosophila at single-cell resolution. + Current biology, 21(1):1\u201311, 2011. + .. [3] Alessandra Griffa, Yasser Alem\u00e1n-G\u00f3mez, and Patric Hagmann. + Structural and functional connectome from 70 young healthy adults [data + set]. Zenodo, 2019. + .. [4] Nikola T Markov, Maria Ercsey-Ravasz, Camille Lamy, Ana Rita Ribeiro + Gomes, Lo\u00efc Magrou, Pierre Misery, Pascale Giroud, Pascal Barone, + Colette Dehay, Zolt\u00e1n Toroczkai, and others. The role of long-range + connections on the specificity of the macaque interareal cortical + network. Proceedings of the National Academy of Sciences, + 110(13):5187\u20135192, 2013. + .. [5] Dharmendra S Modha and Raghavendra Singh. Network architecture of the + long-distance pathways in the macaque brain. Proceedings of the National + Academy of Sciences, 107(30):13485\u201313490, 2010. + .. [6] Mikail Rubinov, Rolf JF Ypma, Charles Watson, and Edward T Bullmore. + Wiring cost and topological participation of the mouse brain connectome. + Proceedings of the National Academy of Sciences, + 112(32):10032\u201310037, 2015. + .. [7] Mihail Bota, Olaf Sporns, and Larry W Swanson. Architecture of the + cerebral cortical association connectome underlying cognition. + Proceedings of the National Academy of Sciences, + 112(16):E2093\u2013E2101, 2015. """ - available_connectomes = sorted(_get_dataset_info('ds-famous-gmat').keys()) + available_connectomes = [ + 'celegans', + 'drosophila', + 'human_func_scale033', + 'human_func_scale060', + 'human_func_scale125', + 'human_func_scale250', + 'human_func_scale500', + 'human_struct_scale033', + 'human_struct_scale060', + 'human_struct_scale125', + 'human_struct_scale250', + 'human_struct_scale500', + 'macaque_markov', + 'macaque_modha', + 'mouse', + 'rat' + ] if dataset not in available_connectomes: raise ValueError('Provided dataset {} not available; must be one of {}' .format(dataset, available_connectomes)) - dataset_name = 'ds-famous-gmat' + base_dataset_name = 'ds-famous_gmat' + _get_reference_info(base_dataset_name, verbose=verbose) - data_dir = op.join(_get_data_dir(data_dir=data_dir), dataset_name) - info = _get_dataset_info(dataset_name)[dataset] - if url is None: - url = info['url'] + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(base_dataset_name) opts = { 'uncompress': True, 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset) + 'move': '{}.tar.gz'.format(base_dataset_name) } - - filenames = [ - op.join(dataset, '{}.csv'.format(fn)) for fn in info['keys'] - ] + [op.join(dataset, 'ref.txt')] - data = fetch_files(data_dir, files=[(f, url, opts) for f in filenames], - resume=resume, verbose=verbose) - - # load data - for n, arr in enumerate(data[:-1]): + fetched = fetch_files( + data_dir, + files=[(base_dataset_name, info['url'], opts)], + resume=resume, verbose=verbose + ) + fetched = Path(fetched[0]) + + data = {} + for f in (fetched / dataset).glob("*.csv"): try: - data[n] = np.loadtxt(arr, delimiter=',') + data[f.stem] = np.loadtxt(f, delimiter=',') except ValueError: - data[n] = np.loadtxt(arr, delimiter=',', dtype=str) - with open(data[-1]) as src: - data[-1] = src.read().strip() + data[f.stem] = np.loadtxt(f, delimiter=',', dtype=str) - return Bunch(**dict(zip(info['keys'] + ['ref'], data))) + return Bunch(**data) def fetch_neurosynth(): diff --git a/netneurotools/datasets/fetch_template.py b/netneurotools/datasets/fetch_template.py index 232a4c5..1380574 100644 --- a/netneurotools/datasets/fetch_template.py +++ b/netneurotools/datasets/fetch_template.py @@ -2,6 +2,7 @@ import json +from pathlib import Path import os.path as op try: @@ -14,39 +15,57 @@ from .datasets_utils import ( SURFACE, - _get_data_dir, _get_dataset_info, _check_freesurfer_subjid + _get_data_dir, _get_dataset_info, _get_reference_info, _check_freesurfer_subjid ) -def fetch_fsaverage(version='fsaverage', data_dir=None, url=None, resume=True, - verbose=1): +def fetch_fsaverage( + version='fsaverage', + data_dir=None, resume=True, verbose=1 + ): """ Download files for fsaverage FreeSurfer template. + This dataset contains + + If you used this data, please cite 1_, 2_, 3_. + Parameters ---------- version : str, optional One of {'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6'}. Default: 'fsaverage' + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with keys ['surf'] where corresponding values + are length-2 lists downloaded template files (each list composed of + files for the left and right hemisphere). + + Other Parameters + ---------------- data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True + Whether to attempt to resume partial download, if possible. Default: True verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 - Returns - ------- - filenames : :class:`sklearn.utils.Bunch` - Dictionary-like object with keys ['surf'] where corresponding values - are length-2 lists downloaded template files (each list composed of - files for the left and right hemisphere). + References + ---------- + .. [1] Anders M Dale, Bruce Fischl, and Martin I Sereno. Cortical + surface-based analysis: i. segmentation and surface reconstruction. + Neuroimage, 9(2):179\u2013194, 1999. + .. [2] Bruce Fischl, Martin I Sereno, and Anders M Dale. Cortical + surface-based analysis: ii: inflation, flattening, and a surface-based + coordinate system. Neuroimage, 9(2):195\u2013207, 1999. + .. [3] Bruce Fischl, Martin I Sereno, Roger BH Tootell, and Anders M Dale. + High-resolution intersubject averaging and a coordinate system for the + cortical surface. Human brain mapping, 8(4):272\u2013284, 1999. """ versions = [ 'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6' @@ -56,93 +75,112 @@ def fetch_fsaverage(version='fsaverage', data_dir=None, url=None, resume=True, 'exist. Must be one of {}'.format(version, versions)) dataset_name = 'tpl-fsaverage' + _get_reference_info(dataset_name, verbose=verbose) + keys = ['orig', 'white', 'smoothwm', 'pial', 'inflated', 'sphere'] data_dir = _get_data_dir(data_dir=data_dir) info = _get_dataset_info(dataset_name)[version] - if url is None: - url = info['url'] - opts = { 'uncompress': True, 'md5sum': info['md5'], 'move': '{}.tar.gz'.format(dataset_name) } - filenames = [ - op.join(version, 'surf', '{}.{}'.format(hemi, surf)) + _filenames = [ + f"{version}/surf/{hemi}.{surf}" for surf in keys for hemi in ['lh', 'rh'] ] try: + # use local FreeSurfer data if available data_dir = _check_freesurfer_subjid(version)[1] - data = [op.join(data_dir, f) for f in filenames] + data = [op.join(data_dir, f) for f in _filenames] except FileNotFoundError: - data = fetch_files(data_dir, resume=resume, verbose=verbose, - files=[(op.join(dataset_name, f), url, opts) - for f in filenames]) + _filenames = [f"{dataset_name}/{_}" for _ in _filenames] + _files = [(f, info['url'], opts) for f in _filenames] + data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)] return Bunch(**dict(zip(keys, data))) -def fetch_hcp_standards(data_dir=None, url=None, resume=True, verbose=1): +def fetch_hcp_standards(data_dir=None, resume=True, verbose=1): """ Fetch HCP standard mesh atlases for converting between FreeSurfer and HCP. - Parameters - ---------- + This dataset contains + + The original file was from 3_, but is no longer available. The archived + file is available from 4_. + + If you used this data, please cite 1_, 2_. + + Returns + ------- + standards : str + Filepath to standard_mesh_atlases directory + + Other Parameters + ---------------- data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True + Whether to attempt to resume partial download, if possible. Default: True verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 - Returns - ------- - standards : str - Filepath to standard_mesh_atlases directory - - Notes - ----- - Original file from: http://brainvis.wustl.edu/workbench/standard_mesh_atlases.zip - Archived file from: https://web.archive.org/web/20220121035833/http://brainvis.wustl.edu/workbench/standard_mesh_atlases.zip + References + ---------- + .. [1] David C Van Essen, Kamil Ugurbil, Edward Auerbach, Deanna + Barch,Timothy EJ Behrens, Richard Bucholz, Acer Chang, Liyong Chen, + Maurizio Corbetta, Sandra W Curtiss, and others. The human connectome + project: a data acquisition perspective. Neuroimage, + 62(4):2222\u20132231, 2012. + .. [2] Matthew F Glasser, Stamatios N Sotiropoulos, J Anthony Wilson, + Timothy S Coalson, Bruce Fischl, Jesper L Andersson, Junqian Xu, Saad + Jbabdi, Matthew Webster, Jonathan R Polimeni, and others. The minimal + preprocessing pipelines for the human connectome project. Neuroimage, + 80:105\u2013124, 2013. + .. [3] http://brainvis.wustl.edu/workbench/standard_mesh_atlases.zip + .. [4] https://web.archive.org/web/20220121035833/http://brainvis.wustl.edu/workbench/standard_mesh_atlases.zip """ dataset_name = 'tpl-hcp_standards' + _get_reference_info(dataset_name, verbose=verbose) + data_dir = _get_data_dir(data_dir=data_dir) info = _get_dataset_info(dataset_name)["standard_mesh_atlases"] - if url is None: - url = info['url'] opts = { 'uncompress': True, 'md5sum': info['md5'], 'move': '{}.tar.gz'.format(dataset_name) } - filenames = [ - 'L.sphere.32k_fs_LR.surf.gii', 'R.sphere.32k_fs_LR.surf.gii' - ] - files = [(op.join(dataset_name, "standard_mesh_atlases", f), url, opts) - for f in filenames] + fetched = fetch_files( + data_dir, + files=[(f'{dataset_name}/standard_mesh_atlases', info['url'], opts)], + resume=resume, verbose=verbose + ) + fetched = Path(fetched[0]) - fetch_files(data_dir, files=files, resume=resume, verbose=verbose) + return fetched - return op.join(data_dir, dataset_name) - -def fetch_civet(density='41k', version='v1', data_dir=None, url=None, - resume=True, verbose=1): +def fetch_civet( + density='41k', version='v1', + data_dir=None, resume=True, verbose=1 + ): """ Fetch CIVET surface files. + This dataset contains + + If you used this data, please cite 1_, 2_, 3_. + Parameters ---------- density : {'41k', '164k'}, optional @@ -150,18 +188,6 @@ def fetch_civet(density='41k', version='v1', data_dir=None, url=None, high-resolution '164k' surface only exists for version 'v2' version : {'v1, 'v2'}, optional Which version of the CIVET surfaces to use. Default: 'v2' - data_dir : str, optional - Path to use as data directory. If not specified, will check for - environmental variable 'NNT_DATA'; if that is not set, will use - `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True - verbose : int, optional - Modifies verbosity of download, where higher numbers mean more updates. - Default: 1 Returns ------- @@ -170,17 +196,36 @@ def fetch_civet(density='41k', version='v1', data_dir=None, url=None, files for CIVET surface. Note for version 'v1' the 'mid' and 'white' files are identical. - References - ---------- - Y. Ad-Dab’bagh, O. Lyttelton, J.-S. Muehlboeck, C. Lepage, D. Einarson, K. - Mok, O. Ivanov, R. Vincent, J. Lerch, E. Fombonne, A. C. Evans, The CIVET - image-processing environment: A fully automated comprehensive pipeline for - anatomical neuroimaging research. Proceedings of the 12th Annual Meeting of - the Organization for Human Brain Mapping (2006). + Other Parameters + ---------------- + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + resume : bool, optional + Whether to attempt to resume partial download, if possible. Default: True + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 Notes ----- License: https://github.com/aces/CIVET_Full_Project/blob/master/LICENSE + + References + ---------- + .. [1] Oliver Lyttelton, Maxime Boucher, Steven Robbins, and Alan Evans. An + unbiased iterative group registration template for cortical surface + analysis. Neuroimage, 34(4):1535\u20131544, 2007. + .. [2] Vladimir S Fonov, Alan C Evans, Robert C McKinstry, C Robert Almli, + and DL Collins. Unbiased nonlinear average age-appropriate brain + templates from birth to adulthood. NeuroImage, 47:S102, 2009. + .. [3] Y Ad-Dab'bagh, O Lyttelton, J Muehlboeck, C Lepage, D Einarson, K + Mok, O Ivanov, R Vincent, J Lerch, and E Fombonne. The civet + image-processing environment: a fully automated comprehensive pipeline + for anatomical neuroimaging research. proceedings of the 12th annual + meeting of the organization for human brain mapping. Florence, Italy, + pages 2266, 2006. """ densities = ['41k', '164k'] if density not in densities: @@ -196,93 +241,94 @@ def fetch_civet(density='41k', version='v1', data_dir=None, url=None, 'version "v2"') dataset_name = 'tpl-civet' + _get_reference_info(dataset_name, verbose=verbose) + keys = ['mid', 'white'] data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name)[version]['civet{}'.format(density)] - if url is None: - url = info['url'] + info = _get_dataset_info(dataset_name)[version][f'civet{density}'] opts = { 'uncompress': True, 'md5sum': info['md5'], 'move': '{}.tar.gz'.format(dataset_name) } - filenames = [ - op.join(dataset_name, version, 'civet{}'.format(density), - 'tpl-civet_space-ICBM152_hemi-{}_den-{}_{}.obj' - .format(hemi, density, surf)) + + _filenames = [ + f"{dataset_name}/{version}/civet{density}/" + f"tpl-civet_space-ICBM152_hemi-{hemi}_den-{density}_{surf}.obj" for surf in keys for hemi in ['L', 'R'] ] + _files = [(f, info['url'], opts) for f in _filenames] - data = fetch_files(data_dir, resume=resume, verbose=verbose, - files=[(f, url, opts) for f in filenames]) + data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)] return Bunch(**dict(zip(keys, data))) -def fetch_conte69(data_dir=None, url=None, resume=True, verbose=1): +def fetch_conte69(data_dir=None, resume=True, verbose=1): """ Download files for Van Essen et al., 2012 Conte69 template. - Parameters - ---------- + This dataset contains + + If you used this data, please cite 1_, 2_. + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with keys ['midthickness', 'inflated', + 'vinflated'], where corresponding values are lists of filepaths to + downloaded template files. + + Other Parameters + ---------------- data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True + Whether to attempt to resume partial download, if possible. Default: True verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 - Returns - ------- - filenames : :class:`sklearn.utils.Bunch` - Dictionary-like object with keys ['midthickness', 'inflated', - 'vinflated'], where corresponding values are lists of filepaths to - downloaded template files. - References ---------- - http://brainvis.wustl.edu/wiki/index.php//Caret:Atlases/Conte69_Atlas - - Van Essen, D. C., Glasser, M. F., Dierker, D. L., Harwell, J., & Coalson, - T. (2011). Parcellations and hemispheric asymmetries of human cerebral - cortex analyzed on surface-based atlases. Cerebral cortex, 22(10), - 2241-2262. - - Notes - ----- - License: ??? + .. [1] David C Van Essen, Kamil Ugurbil, Edward Auerbach, Deanna Barch, + Timothy EJ Behrens, Richard Bucholz, Acer Chang, Liyong Chen, Maurizio + Corbetta, Sandra W Curtiss, and others. The human connectome project: a + data acquisition perspective. Neuroimage, 62(4):2222\u20132231, 2012. + .. [2] David C Van Essen, Matthew F Glasser, Donna L Dierker, John Harwell, + and Timothy Coalson. Parcellations and hemispheric asymmetries of human + cerebral cortex analyzed on surface-based atlases. Cerebral cortex, + 22(10):2241\u20132262, 2012. + .. [3] http://brainvis.wustl.edu/wiki/index.php//Caret:Atlases/Conte69_Atlas """ dataset_name = 'tpl-conte69' + _get_reference_info(dataset_name, verbose=verbose) + keys = ['midthickness', 'inflated', 'vinflated'] data_dir = _get_data_dir(data_dir=data_dir) info = _get_dataset_info(dataset_name) - if url is None: - url = info['url'] - opts = { 'uncompress': True, 'md5sum': info['md5'], 'move': '{}.tar.gz'.format(dataset_name) } - filenames = [ - 'tpl-conte69/tpl-conte69_space-MNI305_variant-fsLR32k_{}.{}.surf.gii' - .format(res, hemi) for res in keys for hemi in ['L', 'R'] - ] + ['tpl-conte69/template_description.json'] + _filenames = [ + f"{dataset_name}/tpl-conte69_space-MNI305_variant-fsLR32k_{res}.{hemi}.surf.gii" + for res in keys for hemi in ['L', 'R'] + ] + [ + f"{dataset_name}/template_description.json" + ] + _files = [(f, info['url'], opts) for f in _filenames] - data = fetch_files(data_dir, files=[(f, url, opts) for f in filenames], - resume=resume, verbose=verbose) + data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) with open(data[-1], 'r') as src: data[-1] = json.load(src) @@ -293,67 +339,63 @@ def fetch_conte69(data_dir=None, url=None, resume=True, verbose=1): return Bunch(**dict(zip(keys + ['info'], data))) -def fetch_yerkes19(data_dir=None, url=None, resume=None, verbose=1): +def fetch_yerkes19(data_dir=None, resume=None, verbose=1): """ Download files for Donahue et al., 2016 Yerkes19 template. - Parameters - ---------- + This dataset contains + + If you used this data, please cite 1_. + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with keys ['midthickness', 'inflated', + 'vinflated'], where corresponding values are lists of filepaths to + downloaded template files. + + Other Parameters + ---------------- data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None - url : str, optional - URL from which to download data. Default: None resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: - True + Whether to attempt to resume partial download, if possible. Default: True verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 - Returns - ------- - filenames : :class:`sklearn.utils.Bunch` - Dictionary-like object with keys ['midthickness', 'inflated', - 'vinflated'], where corresponding values are lists of filepaths to - downloaded template files. - References ---------- - https://balsa.wustl.edu/reference/show/976nz - - Donahue, C. J., Sotiropoulos, S. N., Jbabdi, S., Hernandez-Fernandez, M., - Behrens, T. E., Dyrby, T. B., ... & Glasser, M. F. (2016). Using diffusion - tractography to predict cortical connection strength and distance: a - quantitative comparison with tracers in the monkey. Journal of - Neuroscience, 36(25), 6758-6770. - - Notes - ----- - License: ??? + .. [1] Chad J Donahue, Stamatios N Sotiropoulos, Saad Jbabdi, Moises + Hernandez-Fernandez, Timothy E Behrens, Tim B Dyrby, Timothy Coalson, + Henry Kennedy, Kenneth Knoblauch, David C Van Essen, and others. Using + diffusion tractography to predict cortical connection strength and + distance: a quantitative comparison with tracers in the monkey. Journal + of Neuroscience, 36(25):6758\u20136770, 2016. + .. [2] https://balsa.wustl.edu/reference/show/976nz """ dataset_name = 'tpl-yerkes19' + _get_reference_info(dataset_name, verbose=verbose) + keys = ['midthickness', 'inflated', 'vinflated'] data_dir = _get_data_dir(data_dir=data_dir) info = _get_dataset_info(dataset_name) - if url is None: - url = info['url'] - opts = { 'uncompress': True, 'md5sum': info['md5'], 'move': '{}.tar.gz'.format(dataset_name) } + _filenames = [ + f"{dataset_name}/tpl-yerkes19_space-fsLR32k_{res}.{hemi}.surf.gii" + for res in keys for hemi in ['L', 'R'] - filenames = [ - 'tpl-yerkes19/tpl-yerkes19_space-fsLR32k_{}.{}.surf.gii' - .format(res, hemi) for res in keys for hemi in ['L', 'R'] ] + _files = [(f, info['url'], opts) for f in _filenames] - data = fetch_files(data_dir, files=[(f, url, opts) for f in filenames], - resume=resume, verbose=verbose) + data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) # bundle hemispheres together data = [SURFACE(*data[i:i + 2]) for i in range(0, 6, 2)] diff --git a/netneurotools/datasets/netneurotools.bib b/netneurotools/datasets/netneurotools.bib new file mode 100644 index 0000000..1e78958 --- /dev/null +++ b/netneurotools/datasets/netneurotools.bib @@ -0,0 +1,253 @@ +@article{cammoun2012mapping, + title={Mapping the human connectome at multiple scales with diffusion spectrum MRI}, + author={Cammoun, Leila and Gigandet, Xavier and Meskaldji, Djalel and Thiran, Jean Philippe and Sporns, Olaf and Do, Kim Q and Maeder, Philippe and Meuli, Reto and Hagmann, Patric}, + journal={Journal of neuroscience methods}, + volume={203}, + number={2}, + pages={386--397}, + year={2012}, + publisher={Elsevier} +} + +@article{pauli2018high, + title={A high-resolution probabilistic in vivo atlas of human subcortical brain nuclei}, + author={Pauli, Wolfgang M and Nili, Amanda N and Tyszka, J Michael}, + journal={Scientific data}, + volume={5}, + number={1}, + pages={1--13}, + year={2018}, + publisher={Nature Publishing Group} +} + +@article{van2012human, + title={The Human Connectome Project: a data acquisition perspective}, + author={Van Essen, David C and Ugurbil, Kamil and Auerbach, Edward and Barch, Deanna and Behrens, Timothy EJ and Bucholz, Richard and Chang, Acer and Chen, Liyong and Corbetta, Maurizio and Curtiss, Sandra W and others}, + journal={Neuroimage}, + volume={62}, + number={4}, + pages={2222--2231}, + year={2012}, + publisher={Elsevier} +} + +@article{van2012parcellations, + title={Parcellations and hemispheric asymmetries of human cerebral cortex analyzed on surface-based atlases}, + author={Van Essen, David C and Glasser, Matthew F and Dierker, Donna L and Harwell, John and Coalson, Timothy}, + journal={Cerebral cortex}, + volume={22}, + number={10}, + pages={2241--2262}, + year={2012}, + publisher={Oxford University Press} +} + +@article{glasser2013minimal, + title={The minimal preprocessing pipelines for the Human Connectome Project}, + author={Glasser, Matthew F and Sotiropoulos, Stamatios N and Wilson, J Anthony and Coalson, Timothy S and Fischl, Bruce and Andersson, Jesper L and Xu, Junqian and Jbabdi, Saad and Webster, Matthew and Polimeni, Jonathan R and others}, + journal={Neuroimage}, + volume={80}, + pages={105--124}, + year={2013}, + publisher={Elsevier} +} + +@article{donahue2016using, + title={Using diffusion tractography to predict cortical connection strength and distance: a quantitative comparison with tracers in the monkey}, + author={Donahue, Chad J and Sotiropoulos, Stamatios N and Jbabdi, Saad and Hernandez-Fernandez, Moises and Behrens, Timothy E and Dyrby, Tim B and Coalson, Timothy and Kennedy, Henry and Knoblauch, Kenneth and Van Essen, David C and others}, + journal={Journal of Neuroscience}, + volume={36}, + number={25}, + pages={6758--6770}, + year={2016}, + publisher={Soc Neuroscience} +} + +@article{dale1999cortical, + title={Cortical surface-based analysis: I. Segmentation and surface reconstruction}, + author={Dale, Anders M and Fischl, Bruce and Sereno, Martin I}, + journal={Neuroimage}, + volume={9}, + number={2}, + pages={179--194}, + year={1999}, + publisher={Elsevier} +} + +@article{fischl1999cortical, + title={Cortical surface-based analysis: II: inflation, flattening, and a surface-based coordinate system}, + author={Fischl, Bruce and Sereno, Martin I and Dale, Anders M}, + journal={Neuroimage}, + volume={9}, + number={2}, + pages={195--207}, + year={1999}, + publisher={Elsevier} +} + +@article{fischl1999high, + title={High-resolution intersubject averaging and a coordinate system for the cortical surface}, + author={Fischl, Bruce and Sereno, Martin I and Tootell, Roger BH and Dale, Anders M}, + journal={Human brain mapping}, + volume={8}, + number={4}, + pages={272--284}, + year={1999}, + publisher={Wiley Online Library} +} + +@article{lyttelton2007unbiased, + title={An unbiased iterative group registration template for cortical surface analysis}, + author={Lyttelton, Oliver and Boucher, Maxime and Robbins, Steven and Evans, Alan}, + journal={Neuroimage}, + volume={34}, + number={4}, + pages={1535--1544}, + year={2007}, + publisher={Elsevier} +} + +@article{fonov2009unbiased, + title={Unbiased nonlinear average age-appropriate brain templates from birth to adulthood}, + author={Fonov, Vladimir S and Evans, Alan C and McKinstry, Robert C and Almli, C Robert and Collins, DL}, + journal={NeuroImage}, + volume={47}, + pages={S102}, + year={2009}, + publisher={Elsevier} +} + +@article{ad2006civet, + title={The CIVET image-processing environment: A fully automated comprehensive pipeline for anatomical neuroimaging research. Proceedings of the 12th annual meeting of the organization for human brain mapping}, + author={Ad-Dab'bagh, Y and Lyttelton, O and Muehlboeck, J and Lepage, C and Einarson, D and Mok, K and Ivanov, O and Vincent, R and Lerch, J and Fombonne, E}, + journal={Florence, Italy}, + pages={2266}, + year={2006} +} + +@article{varshney2011structural, + title={Structural properties of the Caenorhabditis elegans neuronal network}, + author={Varshney, Lav R and Chen, Beth L and Paniagua, Eric and Hall, David H and Chklovskii, Dmitri B}, + journal={PLoS computational biology}, + volume={7}, + number={2}, + pages={e1001066}, + year={2011}, + publisher={Public Library of Science San Francisco, USA} +} + +@article{chiang2011three, + title={Three-dimensional reconstruction of brain-wide wiring networks in Drosophila at single-cell resolution}, + author={Chiang, Ann-Shyn and Lin, Chih-Yung and Chuang, Chao-Chun and Chang, Hsiu-Ming and Hsieh, Chang-Huain and Yeh, Chang-Wei and Shih, Chi-Tin and Wu, Jian-Jheng and Wang, Guo-Tzau and Chen, Yung-Chang and others}, + journal={Current biology}, + volume={21}, + number={1}, + pages={1--11}, + year={2011}, + publisher={Elsevier} +} + +@article{griffa2019lausanne, + title={Structural and functional connectome from 70 young healthy adults [data set]}, + author={Griffa, Alessandra and Alem{\'a}n-G{\'o}mez, Yasser and Hagmann, Patric}, + journal={Zenodo}, + year={2019} +} + +@article{markov2013role, + title={The role of long-range connections on the specificity of the macaque interareal cortical network}, + author={Markov, Nikola T and Ercsey-Ravasz, Maria and Lamy, Camille and Ribeiro Gomes, Ana Rita and Magrou, Lo{\"\i}c and Misery, Pierre and Giroud, Pascale and Barone, Pascal and Dehay, Colette and Toroczkai, Zolt{\'a}n and others}, + journal={Proceedings of the National Academy of Sciences}, + volume={110}, + number={13}, + pages={5187--5192}, + year={2013}, + publisher={National Acad Sciences} +} + +@article{modha2010network, + title={Network architecture of the long-distance pathways in the macaque brain}, + author={Modha, Dharmendra S and Singh, Raghavendra}, + journal={Proceedings of the National Academy of Sciences}, + volume={107}, + number={30}, + pages={13485--13490}, + year={2010}, + publisher={National Acad Sciences} +} + +@article{rubinov2015wiring, + title={Wiring cost and topological participation of the mouse brain connectome}, + author={Rubinov, Mikail and Ypma, Rolf JF and Watson, Charles and Bullmore, Edward T}, + journal={Proceedings of the National Academy of Sciences}, + volume={112}, + number={32}, + pages={10032--10037}, + year={2015}, + publisher={National Acad Sciences} +} + +@article{bota2015architecture, + title={Architecture of the cerebral cortical association connectome underlying cognition}, + author={Bota, Mihail and Sporns, Olaf and Swanson, Larry W}, + journal={Proceedings of the National Academy of Sciences}, + volume={112}, + number={16}, + pages={E2093--E2101}, + year={2015}, + publisher={National Acad Sciences} +} + +@article{vazquez2019gradients, + title={Gradients of structure--function tethering across neocortex}, + author={V{\'a}zquez-Rodr{\'\i}guez, Bertha and Su{\'a}rez, Laura E and Markello, Ross D and Shafiei, Golia and Paquola, Casey and Hagmann, Patric and Van Den Heuvel, Martijn P and Bernhardt, Boris C and Spreng, R Nathan and Misic, Bratislav}, + journal={Proceedings of the National Academy of Sciences}, + volume={116}, + number={42}, + pages={21219--21227}, + year={2019}, + publisher={National Acad Sciences} +} + +@article{hansen2023integrating, + title={Integrating multimodal and multiscale connectivity blueprints of the human cerebral cortex in health and disease}, + author={Hansen, Justine Y and Shafiei, Golia and Voigt, Katharina and Liang, Emma X and Cox, Sylvia ML and Leyton, Marco and Jamadar, Sharna D and Misic, Bratislav}, + journal={PLoS biology}, + volume={21}, + number={9}, + pages={e3002314}, + year={2023}, + publisher={Public Library of Science San Francisco, CA USA} +} + +@article{schaefer2018local, + title={Local-global parcellation of the human cerebral cortex from intrinsic functional connectivity MRI}, + author={Schaefer, Alexander and Kong, Ru and Gordon, Evan M and Laumann, Timothy O and Zuo, Xi-Nian and Holmes, Avram J and Eickhoff, Simon B and Yeo, BT Thomas}, + journal={Cerebral cortex}, + volume={28}, + number={9}, + pages={3095--3114}, + year={2018}, + publisher={Oxford University Press} +} + +@article{glasser2016multi, + title={A multi-modal parcellation of human cerebral cortex}, + author={Glasser, Matthew F and Coalson, Timothy S and Robinson, Emma C and Hacker, Carl D and Harwell, John and Yacoub, Essa and Ugurbil, Kamil and Andersson, Jesper and Beckmann, Christian F and Jenkinson, Mark and others}, + journal={Nature}, + volume={536}, + number={7615}, + pages={171--178}, + year={2016}, + publisher={Nature Publishing Group} +} + +@article{scholtens2018mri, + title={An mri von economo--koskinas atlas}, + author={Scholtens, Lianne H and de Reus, Marcel A and de Lange, Siemon C and Schmidt, Ruben and van den Heuvel, Martijn P}, + journal={NeuroImage}, + volume={170}, + pages={249--256}, + year={2018}, + publisher={Elsevier} +} diff --git a/netneurotools/datasets/references.json b/netneurotools/datasets/references.json new file mode 100644 index 0000000..b5214a9 --- /dev/null +++ b/netneurotools/datasets/references.json @@ -0,0 +1,220 @@ +{ + "atl-cammoun2012": { + "primary": [ + { + "citation": "Leila Cammoun, Xavier Gigandet, Djalel Meskaldji, Jean Philippe Thiran, Olaf Sporns, Kim Q Do, Philippe Maeder, Reto Meuli, and Patric Hagmann. Mapping the human connectome at multiple scales with diffusion spectrum mri. Journal of neuroscience methods, 203(2):386\u2013397, 2012.", + "bibkey": "cammoun2012mapping" + } + ] + }, + "atl-pauli2018": { + "primary": [ + { + "citation": "Wolfgang M Pauli, Amanda N Nili, and J Michael Tyszka. A high-resolution probabilistic in vivo atlas of human subcortical brain nuclei. Scientific data, 5(1):1\u201313, 2018.", + "bibkey": "pauli2018high" + } + ] + }, + "tpl-conte69": { + "primary": [ + { + "citation": "David C Van Essen, Kamil Ugurbil, Edward Auerbach, Deanna Barch, Timothy EJ Behrens, Richard Bucholz, Acer Chang, Liyong Chen, Maurizio Corbetta, Sandra W Curtiss, and others. The human connectome project: a data acquisition perspective. Neuroimage, 62(4):2222\u20132231, 2012.", + "bibkey": "van2012human" + }, + { + "citation": "David C Van Essen, Matthew F Glasser, Donna L Dierker, John Harwell, and Timothy Coalson. Parcellations and hemispheric asymmetries of human cerebral cortex analyzed on surface-based atlases. Cerebral cortex, 22(10):2241\u20132262, 2012.", + "bibkey": "van2012parcellations" + } + ] + }, + "tpl-yerkes19": { + "primary": [ + { + "citation": "Chad J Donahue, Stamatios N Sotiropoulos, Saad Jbabdi, Moises Hernandez-Fernandez, Timothy E Behrens, Tim B Dyrby, Timothy Coalson, Henry Kennedy, Kenneth Knoblauch, David C Van Essen, and others. Using diffusion tractography to predict cortical connection strength and distance: a quantitative comparison with tracers in the monkey. Journal of Neuroscience, 36(25):6758\u20136770, 2016.", + "bibkey": "donahue2016using" + } + ] + }, + "tpl-fsaverage": { + "primary": [ + { + "citation": "Anders M Dale, Bruce Fischl, and Martin I Sereno. Cortical surface-based analysis: i. segmentation and surface reconstruction. Neuroimage, 9(2):179\u2013194, 1999.", + "bibkey": "dale1999cortical" + }, + { + "citation": "Bruce Fischl, Martin I Sereno, and Anders M Dale. Cortical surface-based analysis: ii: inflation, flattening, and a surface-based coordinate system. Neuroimage, 9(2):195\u2013207, 1999.", + "bibkey": "fischl1999cortical" + }, + { + "citation": "Bruce Fischl, Martin I Sereno, Roger BH Tootell, and Anders M Dale. High-resolution intersubject averaging and a coordinate system for the cortical surface. Human brain mapping, 8(4):272\u2013284, 1999.", + "bibkey": "fischl1999high" + } + ] + }, + "tpl-civet": { + "primary": [ + { + "citation": "Oliver Lyttelton, Maxime Boucher, Steven Robbins, and Alan Evans. An unbiased iterative group registration template for cortical surface analysis. Neuroimage, 34(4):1535\u20131544, 2007.", + "bibkey": "lyttelton2007unbiased" + }, + { + "citation": "Vladimir S Fonov, Alan C Evans, Robert C McKinstry, C Robert Almli, and DL Collins. Unbiased nonlinear average age-appropriate brain templates from birth to adulthood. NeuroImage, 47:S102, 2009.", + "bibkey": "fonov2009unbiased" + }, + { + "citation": "Y Ad-Dab'bagh, O Lyttelton, J Muehlboeck, C Lepage, D Einarson, K Mok, O Ivanov, R Vincent, J Lerch, and E Fombonne. The civet image-processing environment: a fully automated comprehensive pipeline for anatomical neuroimaging research. proceedings of the 12th annual meeting of the organization for human brain mapping. Florence, Italy, pages 2266, 2006.", + "bibkey": "ad2006civet" + } + ] + }, + "ds-famous_gmat": { + "primary": [ + { + "citation": "", + "bibkey": "" + } + ], + "celegans": [ + { + "citation": "Lav R Varshney, Beth L Chen, Eric Paniagua, David H Hall, and Dmitri B Chklovskii. Structural properties of the caenorhabditis elegans neuronal network. PLoS computational biology, 7(2):e1001066, 2011.", + "bibkey": "varshney2011structural" + } + ], + "drosophila": [ + { + "citation": "Ann-Shyn Chiang, Chih-Yung Lin, Chao-Chun Chuang, Hsiu-Ming Chang, Chang-Huain Hsieh, Chang-Wei Yeh, Chi-Tin Shih, Jian-Jheng Wu, Guo-Tzau Wang, Yung-Chang Chen, and others. Three-dimensional reconstruction of brain-wide wiring networks in drosophila at single-cell resolution. Current biology, 21(1):1\u201311, 2011.", + "bibkey": "chiang2011three" + } + ], + "human": [ + { + "citation": "Alessandra Griffa, Yasser Alem\u00e1n-G\u00f3mez, and Patric Hagmann. Structural and functional connectome from 70 young healthy adults [data set]. Zenodo, 2019.", + "bibkey": "griffa2019lausanne" + } + ], + "macaque_markov": [ + { + "citation": "Nikola T Markov, Maria Ercsey-Ravasz, Camille Lamy, Ana Rita Ribeiro Gomes, Lo\u00efc Magrou, Pierre Misery, Pascale Giroud, Pascal Barone, Colette Dehay, Zolt\u00e1n Toroczkai, and others. The role of long-range connections on the specificity of the macaque interareal cortical network. Proceedings of the National Academy of Sciences, 110(13):5187\u20135192, 2013.", + "bibkey": "markov2013role" + } + ], + "macaque_modha": [ + { + "citation": "Dharmendra S Modha and Raghavendra Singh. Network architecture of the long-distance pathways in the macaque brain. Proceedings of the National Academy of Sciences, 107(30):13485\u201313490, 2010.", + "bibkey": "modha2010network" + } + ], + "mouse": [ + { + "citation": "Mikail Rubinov, Rolf JF Ypma, Charles Watson, and Edward T Bullmore. Wiring cost and topological participation of the mouse brain connectome. Proceedings of the National Academy of Sciences, 112(32):10032\u201310037, 2015.", + "bibkey": "rubinov2015wiring" + } + ], + "rat": [ + { + "citation": "Mihail Bota, Olaf Sporns, and Larry W Swanson. Architecture of the cerebral cortical association connectome underlying cognition. Proceedings of the National Academy of Sciences, 112(16):E2093\u2013E2101, 2015.", + "bibkey": "bota2015architecture" + } + ] + }, + "ds-vazquez_rodriguez2019": { + "primary": [ + { + "citation": "Bertha V\u00e1zquez-Rodr\u00edguez, Laura E Su\u00e1rez, Ross D Markello, Golia Shafiei, Casey Paquola, Patric Hagmann, Martijn P Van Den Heuvel, Boris C Bernhardt, R Nathan Spreng, and Bratislav Misic. Gradients of structure\u2013function tethering across neocortex. Proceedings of the National Academy of Sciences, 116(42):21219\u201321227, 2019.", + "bibkey": "vazquez2019gradients" + } + ] + }, + "atl-schaefer2018": { + "primary": [ + { + "citation": "Alexander Schaefer, Ru Kong, Evan M Gordon, Timothy O Laumann, Xi-Nian Zuo, Avram J Holmes, Simon B Eickhoff, and BT Thomas Yeo. Local-global parcellation of the human cerebral cortex from intrinsic functional connectivity mri. Cerebral cortex, 28(9):3095\u20133114, 2018.", + "bibkey": "schaefer2018local" + } + ] + }, + "atl-mmpall": { + "primary": [ + { + "citation": "Matthew F Glasser, Timothy S Coalson, Emma C Robinson, Carl D Hacker, John Harwell, Essa Yacoub, Kamil Ugurbil, Jesper Andersson, Christian F Beckmann, Mark Jenkinson, and others. A multi-modal parcellation of human cerebral cortex. Nature, 536(7615):171\u2013178, 2016.", + "bibkey": "glasser2016multi" + } + ] + }, + "atl-voneconomo_koskinas": { + "primary": [ + { + "citation": "Lianne H Scholtens, Marcel A de Reus, Siemon C de Lange, Ruben Schmidt, and Martijn P van den Heuvel. An mri von economo\u2013koskinas atlas. NeuroImage, 170:249\u2013256, 2018.", + "bibkey": "scholtens2018mri" + } + ] + }, + "tpl-hcp_standards": { + "primary": [ + { + "citation": "David C Van Essen, Kamil Ugurbil, Edward Auerbach, Deanna Barch, Timothy EJ Behrens, Richard Bucholz, Acer Chang, Liyong Chen, Maurizio Corbetta, Sandra W Curtiss, and others. The human connectome project: a data acquisition perspective. Neuroimage, 62(4):2222\u20132231, 2012.", + "bibkey": "van2012human" + }, + { + "citation": "Matthew F Glasser, Stamatios N Sotiropoulos, J Anthony Wilson, Timothy S Coalson, Bruce Fischl, Jesper L Andersson, Junqian Xu, Saad Jbabdi, Matthew Webster, Jonathan R Polimeni, and others. The minimal preprocessing pipelines for the human connectome project. Neuroimage, 80:105\u2013124, 2013.", + "bibkey": "glasser2013minimal" + } + ] + }, + "ds-hansen_manynetworks": { + "primary": [ + { + "citation": "Justine Y Hansen, Golia Shafiei, Katharina Voigt, Emma X Liang, Sylvia ML Cox, Marco Leyton, Sharna D Jamadar, and Bratislav Misic. Integrating multimodal and multiscale connectivity blueprints of the human cerebral cortex in health and disease. PLoS biology, 21(9):e3002314, 2023.", + "bibkey": "hansen2023integrating" + } + ], + "gene": [ + { + "citation": "", + "bibkey": "" + } + ], + "receptor": [ + { + "citation": "", + "bibkey": "" + } + ], + "larminar": [ + { + "citation": "", + "bibkey": "" + } + ], + "metabolic": [ + { + "citation": "", + "bibkey": "" + } + ], + "haemodynamic": [ + { + "citation": "", + "bibkey": "" + } + ], + "electrophysiological": [ + { + "citation": "", + "bibkey": "" + } + ], + "temporal": [ + { + "citation": "", + "bibkey": "" + } + ], + "cognitive": [ + { + "citation": "", + "bibkey": "" + } + ] + } +} \ No newline at end of file diff --git a/netneurotools/datasets/tests/test_fetch.py b/netneurotools/datasets/tests/test_fetch.py index 02a26c3..20f7301 100644 --- a/netneurotools/datasets/tests/test_fetch.py +++ b/netneurotools/datasets/tests/test_fetch.py @@ -1,155 +1,206 @@ """For testing netneurotools.datasets.fetch_* functionality.""" import os import pytest +from pathlib import Path import numpy as np from netneurotools import datasets -@pytest.mark.parametrize('version', [ - 'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6' -]) -def test_fetch_fsaverage(tmpdir, version): - """Test fetching of fsaverage surfaces.""" - fsaverage = datasets.fetch_fsaverage(version=version, data_dir=tmpdir, - verbose=0) - assert all(hasattr(fsaverage, k) - and len(fsaverage[k]) == 2 - and all(os.path.isfile(hemi) - for hemi in fsaverage[k]) for k in - ['orig', 'white', 'smoothwm', 'pial', 'inflated', 'sphere']) - - -def test_fetch_hcp_standards(tmpdir): - """Test fetching of HCP standard meshes.""" - hcp = datasets.fetch_hcp_standards(data_dir=tmpdir, verbose=1) - assert os.path.isdir(hcp) - - -@pytest.mark.parametrize('version', [ - 'v1', 'v2' -]) -def test_fetch_civet(tmpdir, version): - """Test fetching of CIVET templates.""" - civet = datasets.fetch_civet(version=version, data_dir=tmpdir, verbose=0) - for key in ('mid', 'white'): - assert key in civet - for hemi in ('lh', 'rh'): - assert hasattr(civet[key], hemi) - assert os.path.isfile(getattr(civet[key], hemi)) - - -def test_fetch_conte69(tmpdir): - """Test fetching of Conte69 surfaces.""" - conte = datasets.fetch_conte69(data_dir=tmpdir, verbose=0) - assert all(hasattr(conte, k) for k in - ['midthickness', 'inflated', 'vinflated', 'info']) - - -def test_fetch_yerkes19(tmpdir): - """Test fetching of Yerkes19 surfaces.""" - conte = datasets.fetch_yerkes19(data_dir=tmpdir, verbose=0) - assert all(hasattr(conte, k) for k in - ['midthickness', 'inflated', 'vinflated']) - - -@pytest.mark.parametrize('version, expected', [ - ('MNI152NLin2009aSym', [1, 1, 1, 1, 1]), - ('fsaverage', [2, 2, 2, 2, 2]), - ('fsaverage5', [2, 2, 2, 2, 2]), - ('fsaverage6', [2, 2, 2, 2, 2]), - ('fslr32k', [2, 2, 2, 2, 2]), - ('gcs', [2, 2, 2, 2, 6]) -]) -def test_fetch_cammoun2012(tmpdir, version, expected): - """Test fetching of Cammoun2012 parcellations.""" - keys = ['scale033', 'scale060', 'scale125', 'scale250', 'scale500'] - cammoun = datasets.fetch_cammoun2012(version, data_dir=tmpdir, verbose=0) - - # output has expected keys - assert all(hasattr(cammoun, k) for k in keys) - # and keys are expected lengths! - for k, e in zip(keys, expected): - out = getattr(cammoun, k) - if isinstance(out, (tuple, list)): - assert len(out) == e +class TestFetchTemplate: + """Test fetching of template datasets.""" + + @pytest.mark.parametrize('version', [ + 'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6' + ]) + def test_fetch_fsaverage(self, tmpdir, version): + """Test fetching of fsaverage surfaces.""" + fsaverage = datasets.fetch_fsaverage( + version=version, data_dir=tmpdir, verbose=0 + ) + for k in ['orig', 'white', 'smoothwm', 'pial', 'inflated', 'sphere']: + assert hasattr(fsaverage, k) + assert len(fsaverage[k]) == 2 + assert all(os.path.isfile(hemi) for hemi in fsaverage[k]), fsaverage[k] + + def test_fetch_hcp_standards(self, tmpdir): + """Test fetching of HCP standard meshes.""" + hcp = datasets.fetch_hcp_standards(data_dir=tmpdir, verbose=0) + assert hcp.exists() + + @pytest.mark.parametrize('version', [ + 'v1', 'v2' + ]) + def test_fetch_civet(self, tmpdir, version): + """Test fetching of CIVET templates.""" + civet = datasets.fetch_civet(version=version, data_dir=tmpdir, verbose=0) + for key in ('mid', 'white'): + assert key in civet + for hemi in ('lh', 'rh'): + assert hasattr(civet[key], hemi) + assert os.path.isfile(getattr(civet[key], hemi)) + + def test_fetch_conte69(self, tmpdir): + """Test fetching of Conte69 surfaces.""" + conte = datasets.fetch_conte69(data_dir=tmpdir, verbose=0) + assert all(hasattr(conte, k) for k in + ['midthickness', 'inflated', 'vinflated', 'info']) + + def test_fetch_yerkes19(self, tmpdir): + """Test fetching of Yerkes19 surfaces.""" + yerkes19 = datasets.fetch_yerkes19(data_dir=tmpdir, verbose=0) + assert all(hasattr(yerkes19, k) for k in + ['midthickness', 'inflated', 'vinflated']) + + +class TestFetchAtlas: + """Test fetching of atlas datasets.""" + + @pytest.mark.parametrize('version, expected', [ + ('MNI152NLin2009aSym', [1, 1, 1, 1, 1]), + ('fsaverage', [2, 2, 2, 2, 2]), + ('fsaverage5', [2, 2, 2, 2, 2]), + ('fsaverage6', [2, 2, 2, 2, 2]), + ('fslr32k', [2, 2, 2, 2, 2]), + ('gcs', [2, 2, 2, 2, 6]) + ]) + def test_fetch_cammoun2012(self, tmpdir, version, expected): + """Test fetching of Cammoun2012 parcellations.""" + keys = ['scale033', 'scale060', 'scale125', 'scale250', 'scale500'] + cammoun = datasets.fetch_cammoun2012(version, data_dir=tmpdir, verbose=0) + + # output has expected keys + assert all(hasattr(cammoun, k) for k in keys) + # and keys are expected lengths! + for k, e in zip(keys, expected): + out = getattr(cammoun, k) + if isinstance(out, (tuple, list)): + assert len(out) == e + else: + assert isinstance(out, str) and out.endswith('.nii.gz') + + if 'fsaverage' in version: + with pytest.warns(DeprecationWarning): + datasets.fetch_cammoun2012('surface', data_dir=tmpdir, verbose=0) + + @pytest.mark.parametrize('version', [ + 'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k' + ]) + def test_fetch_schaefer2018(self, tmpdir, version): + """Test fetching of Schaefer2018 parcellations.""" + keys = [ + '{}Parcels{}Networks'.format(p, n) + for p in range(100, 1001, 100) for n in [7, 17] + ] + schaefer = datasets.fetch_schaefer2018(version, data_dir=tmpdir, verbose=0) + + if version == 'fslr32k': + assert all(k in schaefer and os.path.isfile(schaefer[k]) for k in keys) else: - assert isinstance(out, str) and out.endswith('.nii.gz') - - if 'fsaverage' in version: - with pytest.warns(DeprecationWarning): - datasets.fetch_cammoun2012('surface', data_dir=tmpdir, verbose=0) - - -@pytest.mark.parametrize('version', [ - 'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k' -]) -def test_fetch_schaefer2018(tmpdir, version): - """Test fetching of Schaefer2018 parcellations.""" - keys = [ - '{}Parcels{}Networks'.format(p, n) - for p in range(100, 1001, 100) for n in [7, 17] - ] - schaefer = datasets.fetch_schaefer2018(version, data_dir=tmpdir, verbose=0) - - if version == 'fslr32k': - assert all(k in schaefer and os.path.isfile(schaefer[k]) for k in keys) - else: - assert all(k in schaefer - and len(schaefer[k]) == 2 - and all(os.path.isfile(hemi) for hemi in schaefer[k]) - for k in keys) - - -def test_fetch_mmpall(tmpdir): - """Test fetching of MMPAll parcellations.""" - mmp = datasets.fetch_mmpall(data_dir=tmpdir, verbose=0) - assert len(mmp) == 2 - assert all(os.path.isfile(hemi) for hemi in mmp) - assert all(hasattr(mmp, attr) for attr in ('lh', 'rh')) - - -def test_fetch_pauli2018(tmpdir): - """Test fetching of Pauli2018 parcellations.""" - pauli = datasets.fetch_pauli2018(data_dir=tmpdir, verbose=0) - assert all(hasattr(pauli, k) and os.path.isfile(pauli[k]) for k in - ['probabilistic', 'deterministic', 'info']) - - -@pytest.mark.xfail -def test_fetch_ye2020(tmpdir): - """Test fetching of Ye2020 parcellations.""" - pass - - -def test_fetch_voneconomo(tmpdir): - """Test fetching of von Economo parcellations.""" - vek = datasets.fetch_voneconomo(data_dir=tmpdir, verbose=0) - assert all(hasattr(vek, k) and len(vek[k]) == 2 for k in ['gcs', 'ctab']) - assert isinstance(vek.get('info'), str) - - -@pytest.mark.parametrize('dataset, expected', [ - ('celegans', ['conn', 'dist', 'labels', 'ref']), - ('drosophila', ['conn', 'coords', 'labels', 'networks', 'ref']), - ('human_func_scale033', ['conn', 'coords', 'labels', 'ref']), - ('human_func_scale060', ['conn', 'coords', 'labels', 'ref']), - ('human_func_scale125', ['conn', 'coords', 'labels', 'ref']), - ('human_func_scale250', ['conn', 'coords', 'labels', 'ref']), - ('human_func_scale500', ['conn', 'coords', 'labels', 'ref']), - ('human_struct_scale033', ['conn', 'coords', 'dist', 'labels', 'ref']), - ('human_struct_scale060', ['conn', 'coords', 'dist', 'labels', 'ref']), - ('human_struct_scale125', ['conn', 'coords', 'dist', 'labels', 'ref']), - ('human_struct_scale250', ['conn', 'coords', 'dist', 'labels', 'ref']), - ('human_struct_scale500', ['conn', 'coords', 'dist', 'labels', 'ref']), - ('macaque_markov', ['conn', 'dist', 'labels', 'ref']), - ('macaque_modha', ['conn', 'coords', 'dist', 'labels', 'ref']), - ('mouse', ['acronyms', 'conn', 'coords', 'dist', 'labels', 'ref']), - ('rat', ['conn', 'labels', 'ref']), -]) -def test_fetch_famous_gmat(tmpdir, dataset, expected): - """Test fetching of famous G.mat datasets.""" - connectome = datasets.fetch_famous_gmat(dataset, data_dir=tmpdir, verbose=0) - - for key in expected: - assert (key in connectome) - assert isinstance(connectome[key], str if key == 'ref' else np.ndarray) + assert all(k in schaefer + and len(schaefer[k]) == 2 + and all(os.path.isfile(hemi) for hemi in schaefer[k]) + for k in keys) + + def test_fetch_mmpall(self, tmpdir): + """Test fetching of MMPAll parcellations.""" + mmp = datasets.fetch_mmpall(data_dir=tmpdir, verbose=0) + assert len(mmp) == 2 + assert all(os.path.isfile(hemi) for hemi in mmp) + assert all(hasattr(mmp, attr) for attr in ('lh', 'rh')) + + def test_fetch_pauli2018(self, tmpdir): + """Test fetching of Pauli2018 parcellations.""" + pauli = datasets.fetch_pauli2018(data_dir=tmpdir, verbose=0) + assert all(hasattr(pauli, k) and os.path.isfile(pauli[k]) for k in + ['probabilistic', 'deterministic', 'info']) + + @pytest.mark.xfail + def test_fetch_ye2020(self, tmpdir): + """Test fetching of Ye2020 parcellations.""" + assert False + + def test_fetch_voneconomo(self, tmpdir): + """Test fetching of von Economo parcellations.""" + vek = datasets.fetch_voneconomo(data_dir=tmpdir, verbose=0) + assert all(hasattr(vek, k) and len(vek[k]) == 2 for k in ['gcs', 'ctab']) + assert isinstance(vek.get('info'), str) + + +class TestFetchProject: + """Test fetching of project datasets.""" + + def test_fetch_vazquez_rodriguez2019(self, tmpdir): + """Test fetching of Vazquez-Rodriguez2019 dataset.""" + vazquez = datasets.fetch_vazquez_rodriguez2019(data_dir=tmpdir, verbose=0) + for k in ['rsquared', 'gradient']: + assert hasattr(vazquez, k) + assert isinstance(getattr(vazquez, k), np.ndarray) + + @pytest.mark.xfail + def test_fetch_mirchi2018(self, tmpdir): + """Test fetching of Mirchi2018 dataset.""" + assert False + + def test_fetch_hansen_manynetworks(self, tmpdir): + """Test fetching of Hansen et al., 2023 many-networks dataset.""" + hansen = datasets.fetch_hansen_manynetworks(data_dir=tmpdir, verbose=0) + assert "cammoun033" in hansen + assert "gene" in hansen["cammoun033"] + assert isinstance(hansen["cammoun033"]["gene"], Path) + + @pytest.mark.xfail + def test_fetch_hansen_receptors(self, tmpdir): + """Test fetching of Hansen et al., 2022 receptor dataset.""" + # hansen = datasets.fetch_hansen_receptors(data_dir=tmpdir, verbose=0) + assert False + + @pytest.mark.xfail + def test_fetch_hansen_genecognition(self, tmpdir): + """Test fetching of Hansen et al., 2021 gene-cognition dataset.""" + # hansen = datasets.fetch_hansen_genecognition(data_dir=tmpdir, verbose=0) + assert False + + @pytest.mark.xfail + def test_fetch_hansen_brainstem(self, tmpdir): + """Test fetching of Hansen et al., 2024 brainstem dataset.""" + # hansen = datasets.fetch_hansen_brainstem(data_dir=tmpdir, verbose=0) + assert False + + @pytest.mark.xfail + def test_fetch_shafiei_hcpmeg(self, tmpdir): + """Test fetching of Shafiei et al., 2022 & 2023 HCP-MEG dataset.""" + # shafiei = datasets.fetch_shafiei_hcpmeg(data_dir=tmpdir, verbose=0) + assert False + + @pytest.mark.xfail + def test_fetch_suarez_mami(self, tmpdir): + """Test fetching of Suarez et al., 2022 mami dataset.""" + # suarez = datasets.fetch_suarez_mami(data_dir=tmpdir, verbose=0) + assert False + + @pytest.mark.parametrize('dataset, expected', [ + ('celegans', ['conn', 'dist', 'labels', 'ref']), + ('drosophila', ['conn', 'coords', 'labels', 'networks', 'ref']), + ('human_func_scale033', ['conn', 'coords', 'labels', 'ref']), + ('human_func_scale060', ['conn', 'coords', 'labels', 'ref']), + ('human_func_scale125', ['conn', 'coords', 'labels', 'ref']), + ('human_func_scale250', ['conn', 'coords', 'labels', 'ref']), + ('human_func_scale500', ['conn', 'coords', 'labels', 'ref']), + ('human_struct_scale033', ['conn', 'coords', 'dist', 'labels', 'ref']), + ('human_struct_scale060', ['conn', 'coords', 'dist', 'labels', 'ref']), + ('human_struct_scale125', ['conn', 'coords', 'dist', 'labels', 'ref']), + ('human_struct_scale250', ['conn', 'coords', 'dist', 'labels', 'ref']), + ('human_struct_scale500', ['conn', 'coords', 'dist', 'labels', 'ref']), + ('macaque_markov', ['conn', 'dist', 'labels', 'ref']), + ('macaque_modha', ['conn', 'coords', 'dist', 'labels', 'ref']), + ('mouse', ['acronyms', 'conn', 'coords', 'dist', 'labels', 'ref']), + ('rat', ['conn', 'labels', 'ref']), + ]) + def test_fetch_famous_gmat(self, tmpdir, dataset, expected): + """Test fetching of famous G.mat datasets.""" + connectome = datasets.fetch_famous_gmat(dataset, data_dir=tmpdir, verbose=0) + + expected.remove("ref") + for key in expected: + assert (key in connectome) + assert isinstance(connectome[key], str if key == 'ref' else np.ndarray) From 16bf5ff1291c67d9c535bc9b00b33d1c8441f31e Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Tue, 21 May 2024 22:38:36 -0400 Subject: [PATCH 11/32] Fix tests --- MANIFEST.in | 4 +++- netneurotools/datasets/tests/test_datasets_utils.py | 2 +- pyproject.toml | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index 9765c8a..8bd58d4 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,5 @@ include README.rst LICENSE environment.yml requirements.txt include versioneer.py -include netneurotools/datasets/datasets.json \ No newline at end of file +include netneurotools/datasets/datasets.json +include netneurotools/datasets/references.json +include netneurotools/datasets/netneurotools.bib \ No newline at end of file diff --git a/netneurotools/datasets/tests/test_datasets_utils.py b/netneurotools/datasets/tests/test_datasets_utils.py index 15e69cd..9d5d85c 100644 --- a/netneurotools/datasets/tests/test_datasets_utils.py +++ b/netneurotools/datasets/tests/test_datasets_utils.py @@ -10,7 +10,7 @@ ('atl-cammoun2012', ['fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k', 'MNI152NLin2009aSym', 'gcs']), ('tpl-conte69', ['url', 'md5']), - ('atl-pauli2018', ['url', 'md5', 'name']), + ('atl-pauli2018', ['probabilistic', 'deterministic', 'info']), ('tpl-fsaverage', ['fsaverage' + f for f in ['', '3', '4', '5', '6']]), ('atl-schaefer2018', ['fsaverage', 'fsaverage6', 'fsaverage6']) ]) diff --git a/pyproject.toml b/pyproject.toml index 0e1d026..c3c80cf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,7 +73,7 @@ include-package-data = true find = {} [tool.setuptools.package-data] -"*" = ["*.json"] +"*" = ["*.json", "*.bib"] [tool.setuptools.dynamic] version = {attr = "netneurotools.__version__"} From 4eda1a04775d12990733750de71c365396d67728 Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Tue, 21 May 2024 23:18:00 -0400 Subject: [PATCH 12/32] Fix style --- netneurotools/datasets/datasets_utils.py | 31 ++++++++++++---------- netneurotools/datasets/fetch_atlas.py | 31 ++++++++++++---------- netneurotools/datasets/fetch_project.py | 10 ++++--- netneurotools/datasets/fetch_template.py | 28 +++++++++++-------- netneurotools/datasets/tests/test_fetch.py | 2 +- netneurotools/networks/networks_utils.py | 20 ++++++++------ pyproject.toml | 1 - 7 files changed, 70 insertions(+), 53 deletions(-) diff --git a/netneurotools/datasets/datasets_utils.py b/netneurotools/datasets/datasets_utils.py index a6e9cf5..82bd228 100644 --- a/netneurotools/datasets/datasets_utils.py +++ b/netneurotools/datasets/datasets_utils.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Utilites for loading / creating datasets.""" import json @@ -122,8 +121,10 @@ def _get_dataset_info(name): try: return NNT_DATASETS[name] except KeyError: - raise KeyError("Provided dataset '{}' is not valid. Must be one of: {}" - .format(name, sorted(NNT_DATASETS.keys()))) from None + raise KeyError( + f"Provided dataset {name} is not valid. " + f"Must be one of: {sorted(NNT_DATASETS.keys())}" + ) from None NNT_REFERENCES = _load_resource_json('datasets/references.json') @@ -154,10 +155,11 @@ def _get_reference_info(name, verbose=1, return_dict=False): if return_dict: return curr_refs - except KeyError: - raise KeyError("Provided dataset '{}' is not valid. Must be one of: {}" - .format(name, sorted(NNT_REFERENCES.keys()))) from None + raise KeyError( + f"Provided dataset {name} is not valid. " + f"Must be one of: {sorted(NNT_REFERENCES.keys())}" + ) from None def _fill_reference_json(bib_file, json_file, overwrite=False, use_defaults=False): @@ -245,9 +247,10 @@ def _check_freesurfer_subjid(subject_id, subjects_dir=None): subjdir = os.path.join(subjects_dir, subject_id) if not os.path.isdir(subjdir): - raise FileNotFoundError('Cannot find specified subject id {} in ' - 'provided subject directory {}.' - .format(subject_id, subjects_dir)) + raise FileNotFoundError( + f'Cannot find specified subject id {subject_id} in ' + f'provided subject directory {subjects_dir}.' + ) return subject_id, subjects_dir @@ -276,11 +279,11 @@ def _get_freesurfer_subjid(subject_id, subjects_dir=None): subject_id, subjects_dir = _check_freesurfer_subjid(subject_id, subjects_dir) except FileNotFoundError: if 'fsaverage' not in subject_id: - raise ValueError('Provided subject {} does not exist in provided ' - 'subjects_dir {}' - .format(subject_id, subjects_dir)) from None - from ..datasets import fetch_fsaverage - from ..datasets import _get_data_dir + raise ValueError( + f'Provided subject {subject_id} does not exist in provided ' + f'subjects_dir {subjects_dir}' + ) from None + from .fetch_template import fetch_fsaverage fetch_fsaverage(subject_id) subjects_dir = os.path.join(_get_data_dir(), 'tpl-fsaverage') subject_id, subjects_dir = _check_freesurfer_subjid(subject_id, subjects_dir) diff --git a/netneurotools/datasets/fetch_atlas.py b/netneurotools/datasets/fetch_atlas.py index 2b3feec..5df542c 100644 --- a/netneurotools/datasets/fetch_atlas.py +++ b/netneurotools/datasets/fetch_atlas.py @@ -88,9 +88,10 @@ def fetch_cammoun2012( 'MNI152NLin2009aSym' ] if version not in versions: - raise ValueError('The version of Cammoun et al., 2012 parcellation ' - 'requested "{}" does not exist. Must be one of {}' - .format(version, versions)) + raise ValueError( + f'The version of Cammoun et al., 2012 parcellation ' + f'requested {version} does not exist. Must be one of {versions}' + ) dataset_name = 'atl-cammoun2012' _get_reference_info(dataset_name, verbose=verbose) @@ -102,7 +103,7 @@ def fetch_cammoun2012( opts = { 'uncompress': True, 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) + 'move': f'{dataset_name}.tar.gz' } # filenames differ based on selected version of dataset @@ -202,15 +203,16 @@ def fetch_schaefer2018( """ versions = ['fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k'] if version not in versions: - raise ValueError('The version of Schaefer et al., 2018 parcellation ' - 'requested "{}" does not exist. Must be one of {}' - .format(version, versions)) + raise ValueError( + f'The version of Schaefer et al., 2018 parcellation ' + f'requested "{version}" does not exist. Must be one of {versions}' + ) dataset_name = 'atl-schaefer2018' _get_reference_info(dataset_name, verbose=verbose) keys = [ - '{}Parcels{}Networks'.format(p, n) + f'{p}Parcels{n}Networks' for p in range(100, 1001, 100) for n in [7, 17] ] @@ -219,7 +221,7 @@ def fetch_schaefer2018( opts = { 'uncompress': True, 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) + 'move': f'{dataset_name}.tar.gz' } if version == 'fslr32k': @@ -292,9 +294,10 @@ def fetch_mmpall( """ versions = ['fslr32k'] if version not in versions: - raise ValueError('The version of Glasser et al., 2016 parcellation ' - 'requested "{}" does not exist. Must be one of {}' - .format(version, versions)) + raise ValueError( + f'The version of Glasser et al., 2016 parcellation ' + f'requested "{version}" does not exist. Must be one of {versions}' + ) dataset_name = 'atl-mmpall' _get_reference_info(dataset_name, verbose=verbose) @@ -304,7 +307,7 @@ def fetch_mmpall( opts = { 'uncompress': True, 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) + 'move': f'{dataset_name}.tar.gz' } _filenames = [ @@ -430,7 +433,7 @@ def fetch_voneconomo(data_dir=None, url=None, resume=True, verbose=1): opts = { 'uncompress': True, 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) + 'move': f'{dataset_name}.tar.gz' } _filenames = [ diff --git a/netneurotools/datasets/fetch_project.py b/netneurotools/datasets/fetch_project.py index e9a592d..ce3def1 100644 --- a/netneurotools/datasets/fetch_project.py +++ b/netneurotools/datasets/fetch_project.py @@ -61,7 +61,7 @@ def fetch_vazquez_rodriguez2019(data_dir=None, resume=True, verbose=1): opts = { 'uncompress': True, 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) + 'move': f'{dataset_name}.tar.gz' } fetched = fetch_files( data_dir, @@ -309,8 +309,10 @@ def fetch_famous_gmat( ] if dataset not in available_connectomes: - raise ValueError('Provided dataset {} not available; must be one of {}' - .format(dataset, available_connectomes)) + raise ValueError( + f'Provided dataset {dataset} not available; ' + f'must be one of {available_connectomes}' + ) base_dataset_name = 'ds-famous_gmat' _get_reference_info(base_dataset_name, verbose=verbose) @@ -320,7 +322,7 @@ def fetch_famous_gmat( opts = { 'uncompress': True, 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(base_dataset_name) + 'move': f'{base_dataset_name}.tar.gz' } fetched = fetch_files( data_dir, diff --git a/netneurotools/datasets/fetch_template.py b/netneurotools/datasets/fetch_template.py index 1380574..fca331e 100644 --- a/netneurotools/datasets/fetch_template.py +++ b/netneurotools/datasets/fetch_template.py @@ -71,8 +71,10 @@ def fetch_fsaverage( 'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6' ] if version not in versions: - raise ValueError('The version of fsaverage requested "{}" does not ' - 'exist. Must be one of {}'.format(version, versions)) + raise ValueError( + f'The version of fsaverage requested {version} does not ' + f'exist. Must be one of {versions}' + ) dataset_name = 'tpl-fsaverage' _get_reference_info(dataset_name, verbose=verbose) @@ -84,7 +86,7 @@ def fetch_fsaverage( opts = { 'uncompress': True, 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) + 'move': f'{dataset_name}.tar.gz' } _filenames = [ @@ -158,7 +160,7 @@ def fetch_hcp_standards(data_dir=None, resume=True, verbose=1): opts = { 'uncompress': True, 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) + 'move': f'{dataset_name}.tar.gz' } fetched = fetch_files( data_dir, @@ -229,12 +231,16 @@ def fetch_civet( """ densities = ['41k', '164k'] if density not in densities: - raise ValueError('The density of CIVET requested "{}" does not exist. ' - 'Must be one of {}'.format(density, densities)) + raise ValueError( + f'The density of CIVET requested "{density}" does not exist. ' + f'Must be one of {densities}' + ) versions = ['v1', 'v2'] if version not in versions: - raise ValueError('The version of CIVET requested "{}" does not exist. ' - 'Must be one of {}'.format(version, versions)) + raise ValueError( + f'The version of CIVET requested "{version}" does not exist. ' + f'Must be one of {versions}' + ) if version == 'v1' and density == '164k': raise ValueError('The "164k" density CIVET surface only exists for ' @@ -251,7 +257,7 @@ def fetch_civet( opts = { 'uncompress': True, 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) + 'move': f'{dataset_name}.tar.gz' } _filenames = [ @@ -317,7 +323,7 @@ def fetch_conte69(data_dir=None, resume=True, verbose=1): opts = { 'uncompress': True, 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) + 'move': f'{dataset_name}.tar.gz' } _filenames = [ @@ -386,7 +392,7 @@ def fetch_yerkes19(data_dir=None, resume=None, verbose=1): opts = { 'uncompress': True, 'md5sum': info['md5'], - 'move': '{}.tar.gz'.format(dataset_name) + 'move': f'{dataset_name}.tar.gz' } _filenames = [ f"{dataset_name}/tpl-yerkes19_space-fsLR32k_{res}.{hemi}.surf.gii" diff --git a/netneurotools/datasets/tests/test_fetch.py b/netneurotools/datasets/tests/test_fetch.py index 20f7301..4d14b21 100644 --- a/netneurotools/datasets/tests/test_fetch.py +++ b/netneurotools/datasets/tests/test_fetch.py @@ -88,7 +88,7 @@ def test_fetch_cammoun2012(self, tmpdir, version, expected): def test_fetch_schaefer2018(self, tmpdir, version): """Test fetching of Schaefer2018 parcellations.""" keys = [ - '{}Parcels{}Networks'.format(p, n) + '{p}Parcels{n}Networks' for p in range(100, 1001, 100) for n in [7, 17] ] schaefer = datasets.fetch_schaefer2018(version, data_dir=tmpdir, verbose=0) diff --git a/netneurotools/networks/networks_utils.py b/netneurotools/networks/networks_utils.py index 5f72c50..5085e55 100644 --- a/netneurotools/networks/networks_utils.py +++ b/netneurotools/networks/networks_utils.py @@ -59,8 +59,9 @@ def binarize_network(network, retain=10, keep_diag=False): netneurotools.networks.threshold_network """ if retain < 0 or retain > 100: - raise ValueError('Value provided for `retain` is outside [0, 100]: {}' - .format(retain)) + raise ValueError( + f'Value provided for `retain` is outside [0, 100]: {retain}' + ) prctile = 100 - retain triu = get_triu(network) @@ -97,8 +98,10 @@ def threshold_network(network, retain=10): netneurotools.networks.binarize_network """ if retain < 0 or retain > 100: - raise ValueError('Value provided for `retain` must be a percent ' - 'in range [0, 100]. Provided: {}'.format(retain)) + raise ValueError( + f'Value provided for `retain` must be a percent ' + f'in range [0, 100]. Provided: {retain}' + ) # get number of nodes in graph and invert weights (MINIMUM spanning tree) nodes = len(network) @@ -111,10 +114,11 @@ def threshold_network(network, retain=10): # determine # of remaining edges and ensure we're not over the limit remain = int((retain / 100) * ((nodes * (nodes - 1)) / 2)) - mst_edges if remain < 0: - raise ValueError('Minimum spanning tree with {} edges exceeds desired ' - 'connection density of {}% ({} edges). Cannot ' - 'proceed with graph creation.' - .format(mst_edges, retain, remain + mst_edges)) + raise ValueError( + f'Minimum spanning tree with {mst_edges} edges exceeds desired ' + f'connection density of {retain}% ({remain + mst_edges} edges). Cannot ' + f'proceed with graph creation.' + ) # zero out edges already in MST and then get indices of next best edges graph -= mst diff --git a/pyproject.toml b/pyproject.toml index c3c80cf..653beee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,7 +105,6 @@ ignore = [ # "W605", # Invalid escape sequence: latex "NPY002", # Replace legacy `np.random` call with `np.random.Generator` ] -extend-select = ["E302"] preview = true [tool.ruff.lint.pydocstyle] From 32335f7d8b64b9cac5eb39100452ff0e0e3ab1a8 Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Wed, 22 May 2024 18:17:10 -0400 Subject: [PATCH 13/32] Fix tests --- netneurotools/datasets/tests/test_fetch.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/netneurotools/datasets/tests/test_fetch.py b/netneurotools/datasets/tests/test_fetch.py index 4d14b21..2a86a0c 100644 --- a/netneurotools/datasets/tests/test_fetch.py +++ b/netneurotools/datasets/tests/test_fetch.py @@ -88,7 +88,7 @@ def test_fetch_cammoun2012(self, tmpdir, version, expected): def test_fetch_schaefer2018(self, tmpdir, version): """Test fetching of Schaefer2018 parcellations.""" keys = [ - '{p}Parcels{n}Networks' + f'{p}Parcels{n}Networks' for p in range(100, 1001, 100) for n in [7, 17] ] schaefer = datasets.fetch_schaefer2018(version, data_dir=tmpdir, verbose=0) @@ -96,10 +96,11 @@ def test_fetch_schaefer2018(self, tmpdir, version): if version == 'fslr32k': assert all(k in schaefer and os.path.isfile(schaefer[k]) for k in keys) else: - assert all(k in schaefer - and len(schaefer[k]) == 2 - and all(os.path.isfile(hemi) for hemi in schaefer[k]) - for k in keys) + for k in keys: + assert k in schaefer + assert len(schaefer[k]) == 2 + assert all(os.path.isfile(hemi) for hemi in schaefer[k]) + def test_fetch_mmpall(self, tmpdir): """Test fetching of MMPAll parcellations.""" From 9f06f934708b211e166106ad42c84908dd466248 Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Wed, 22 May 2024 18:18:37 -0400 Subject: [PATCH 14/32] Fix style --- netneurotools/datasets/tests/test_fetch.py | 1 - 1 file changed, 1 deletion(-) diff --git a/netneurotools/datasets/tests/test_fetch.py b/netneurotools/datasets/tests/test_fetch.py index 2a86a0c..b55e58d 100644 --- a/netneurotools/datasets/tests/test_fetch.py +++ b/netneurotools/datasets/tests/test_fetch.py @@ -101,7 +101,6 @@ def test_fetch_schaefer2018(self, tmpdir, version): assert len(schaefer[k]) == 2 assert all(os.path.isfile(hemi) for hemi in schaefer[k]) - def test_fetch_mmpall(self, tmpdir): """Test fetching of MMPAll parcellations.""" mmp = datasets.fetch_mmpall(data_dir=tmpdir, verbose=0) From 9123745f54bc54db33f4bcdcc82c01a47ee68cbe Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Tue, 5 Nov 2024 17:05:27 -0500 Subject: [PATCH 15/32] Fix pyproject.toml --- pyproject.toml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 653beee..bc3219f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,8 +69,11 @@ build-backend = "setuptools.build_meta" [tool.setuptools] include-package-data = true -[tool.setuptools.packages] -find = {} +[tool.setuptools.packages.find] +include = [ + "netneurotools", + "netneurotools.*" +] [tool.setuptools.package-data] "*" = ["*.json", "*.bib"] @@ -123,4 +126,4 @@ omit = [ [tool.codespell] skip = '.git,*.pdf,*.svg,versioneer.py' -# ignore-words-list = '' \ No newline at end of file +# ignore-words-list = '' From 3a16eca2dfe16ea092a658f2b12a98bca48570c6 Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Tue, 5 Nov 2024 17:07:23 -0500 Subject: [PATCH 16/32] Fix error --- netneurotools/stats/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/netneurotools/stats/__init__.py b/netneurotools/stats/__init__.py index e000548..bc35e4d 100644 --- a/netneurotools/stats/__init__.py +++ b/netneurotools/stats/__init__.py @@ -27,7 +27,7 @@ __all__ = [ # correlation - 'efficient_pearsonr', 'weighted_pearsonr', 'make_correlated_xy' + 'efficient_pearsonr', 'weighted_pearsonr', 'make_correlated_xy', # permutation_test 'permtest_1samp', 'permtest_rel', 'permtest_pearsonr', # regression From 1420d85d22687ae59a38db0656ff2ad3a2a18ebe Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Tue, 5 Nov 2024 17:46:27 -0500 Subject: [PATCH 17/32] Fix numpy changes --- netneurotools/stats/permutation_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/netneurotools/stats/permutation_test.py b/netneurotools/stats/permutation_test.py index 147e4d1..9ff4434 100644 --- a/netneurotools/stats/permutation_test.py +++ b/netneurotools/stats/permutation_test.py @@ -167,7 +167,7 @@ def permtest_rel(a, b, axis=0, n_perm=1000, seed=0): abs_true = np.abs(true_diff) # idx array - reidx = np.meshgrid(*[range(f) for f in ab.shape], indexing='ij') + reidx = list(np.meshgrid(*[range(f) for f in ab.shape], indexing='ij')) permutations = np.ones(true_diff.shape) for _ in range(n_perm): From fd3b03d652f3c407495a1dd1cd7ddf83ccc104cc Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Fri, 8 Nov 2024 12:25:25 -0500 Subject: [PATCH 18/32] [MNT] Update redocs/requirements.txt --- docs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 218744e..0162e19 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,5 +1,5 @@ -r ../requirements.txt -sphinx>=2.0, <7.0.0 +sphinx>=2.0 sphinx_rtd_theme sphinx-gallery pillow From 2d68c5c984ca836090376f4029e28c98acab3b8b Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Fri, 8 Nov 2024 14:46:46 -0500 Subject: [PATCH 19/32] Update README.rst --- README.rst | 81 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 57 insertions(+), 24 deletions(-) diff --git a/README.rst b/README.rst index e8a6f50..b5dd225 100644 --- a/README.rst +++ b/README.rst @@ -30,49 +30,82 @@ usage in the `Network Neuroscience Lab `_, housed in the `Brain Imaging Centre `_ at `McGill University `_. -.. _installation: -Installation ------------- +.. _features: -Install directly from PyPi with :code:`pip install netneurotools` or install the main branch with +Features +-------- -.. code-block:: bash +Netneurotools provides a wide range of tools for network neuroscience research. - git clone https://github.com/netneurolab/netneurotools.git - cd netneurotools - pip install . +* A range of useful datasets fetchers -.. _features: + * Templates and atlases + * Multimodal annotations + * Data from our published papers -Features --------- +* Network construction: empirical and surrogate + + * Consensus networks + * Randomized (null) networks + +* Network (graph) metrics calculation -* Network neuroscience metrics: up-to-date and optimized + * BCT-like metrics: up-to-date and optimized + * Agent-based models + * Statistical metrics - * Network communication - * Null networks +* Brain and network visualization -* Brain plotting functions: easy to use and customize + * Surface visualization + * 3D point brain + * Sorted communities - * Surface visualization - `plot_fsaverage `_ - and `plot_fslr `_ - * 3D point brain `plot_point_brain `_ - * Sorted communities `plot_mod_heatmap `_ +* Network modularity workflows -* Statistics functions + * Consensus modules - * Dominance analysis `get_dominance_stats `_ +* Spatial statistics -* Fetchers for common datasets + * Spatial autocorrelation -* Utilities for working with FreeSurfer and CIVET +* Optimized statistics routines + * Correlation estimation + * Permutation testing + +* Convenient interface for external tools + +* A place for experimental procedures Check out our `documentation `_ for more information! + +.. _installation: + +Installation +------------ + +This package is under active development. We recommend installing the latest version +with + +.. code-block:: bash + + pip install git+https://github.com/netneurolab/netneurotools.git + + +It is also possible to install directly from PyPi with :code:`pip install netneurotools`. + +If you are looking for the earlier version of the toolbox before the recent breaking changes, +you can install it with + +.. code-block:: bash + + pip install git+https://github.com/netneurolab/netneurotools.git@0.2.X + + + .. _development: Development From c41dc01945e97182c379b342ffaa9f7e000dea9f Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Fri, 8 Nov 2024 14:49:28 -0500 Subject: [PATCH 20/32] Update README.rst to streamline content and improve clarity --- README.rst | 36 +++++------------------------------- 1 file changed, 5 insertions(+), 31 deletions(-) diff --git a/README.rst b/README.rst index b5dd225..01c3a0f 100644 --- a/README.rst +++ b/README.rst @@ -40,43 +40,18 @@ Netneurotools provides a wide range of tools for network neuroscience research. * A range of useful datasets fetchers - * Templates and atlases - * Multimodal annotations - * Data from our published papers - * Network construction: empirical and surrogate - * Consensus networks - * Randomized (null) networks - -* Network (graph) metrics calculation - - * BCT-like metrics: up-to-date and optimized - * Agent-based models - * Statistical metrics +* Network (graph) metrics calculation: up-to-date and optimized * Brain and network visualization - * Surface visualization - * 3D point brain - * Sorted communities - -* Network modularity workflows - - * Consensus modules - -* Spatial statistics - - * Spatial autocorrelation - * Optimized statistics routines - * Correlation estimation - * Permutation testing - * Convenient interface for external tools -* A place for experimental procedures +* And much more! + Check out our `documentation `_ for more information! @@ -87,6 +62,8 @@ for more information! Installation ------------ +You can install directly from PyPi with :code:`pip install netneurotools`. + This package is under active development. We recommend installing the latest version with @@ -95,8 +72,6 @@ with pip install git+https://github.com/netneurolab/netneurotools.git -It is also possible to install directly from PyPi with :code:`pip install netneurotools`. - If you are looking for the earlier version of the toolbox before the recent breaking changes, you can install it with @@ -105,7 +80,6 @@ you can install it with pip install git+https://github.com/netneurolab/netneurotools.git@0.2.X - .. _development: Development From 6f11e8d762aee4752b57a86ec78615430f63b879 Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Mon, 11 Nov 2024 19:03:54 -0500 Subject: [PATCH 21/32] Refactor dataset download functions to improve readability --- README.rst | 3 +- netneurotools/datasets/_mirchi2018.py | 4 +- netneurotools/datasets/datasets.json | 252 +++++++------ netneurotools/datasets/datasets_utils.py | 183 +++++++--- netneurotools/datasets/fetch_atlas.py | 331 +++++++----------- netneurotools/datasets/fetch_project.py | 165 ++++----- netneurotools/datasets/fetch_template.py | 256 ++++++-------- .../datasets/tests/test_datasets_utils.py | 1 - netneurotools/datasets/tests/test_fetch.py | 8 +- 9 files changed, 588 insertions(+), 615 deletions(-) diff --git a/README.rst b/README.rst index 01c3a0f..49b7f88 100644 --- a/README.rst +++ b/README.rst @@ -26,7 +26,7 @@ netneurotools: Tools for network neuroscience | This toolbox is a collection of functions written in Python that get frequent -usage in the `Network Neuroscience Lab `_, housed in +usage in the `Network Neuroscience Lab `_, housed in the `Brain Imaging Centre `_ at `McGill University `_. @@ -92,6 +92,7 @@ you've found a bug, are experiencing a problem, or have a question, create a new `issue `_ with some information about it and one of our team members will do our best to help you. + .. _licensing: License Information diff --git a/netneurotools/datasets/_mirchi2018.py b/netneurotools/datasets/_mirchi2018.py index 9addfea..60e0c09 100644 --- a/netneurotools/datasets/_mirchi2018.py +++ b/netneurotools/datasets/_mirchi2018.py @@ -67,7 +67,7 @@ } -def _get_fc(data_dir=None, resume=True, verbose=1): +def _get_fc(verbose=1): """ Get functional connections from MyConnectome parcelled time series data. @@ -95,7 +95,7 @@ def _get_fc(data_dir=None, resume=True, verbose=1): return np.vstack(fc) -def _get_panas(data_dir=None, resume=True, verbose=1): +def _get_panas(): """ Get PANAS subscales from MyConnectome behavioral data. diff --git a/netneurotools/datasets/datasets.json b/netneurotools/datasets/datasets.json index e9fdd66..d0183b8 100644 --- a/netneurotools/datasets/datasets.json +++ b/netneurotools/datasets/datasets.json @@ -3,140 +3,145 @@ "gcs": { "url-type": "osf", "url": [ - "mb37e", - "5ce6bb4423fec40017e82c5e" + "udpv8", + "67326ef9ee12e85a3662f7c4" ], - "md5": "266c4520af768e766328fb8e6648005d" + "md5": "a9a0779258c170805e4690394802a707", + "uncompress": true, + "rel-path": "atl-cammoun2012/gcs" }, "fsaverage": { "url-type": "osf", "url": [ - "mb37e", - "5ce6c30523fec40017e83439" + "udpv8", + "67326ef5c41abfb7cd0ddf1d" ], - "md5": "2a19eb4744c0ce6c243f721bd43ecff0" + "md5": "a67cad69c51749240d4b1b0100f429f5", + "uncompress": true, + "rel-path": "atl-cammoun2012/fsaverage" }, "fsaverage5": { "url-type": "osf", "url": [ - "mb37e", - "5e189a1c57341903868036dd" + "udpv8", + "67326ef5b1dd1a8ca6b2ef68" ], - "md5": "2afb22e1887d47f1ca81c340fff7692b" + "md5": "2648b4d14461128c0889d9b9ad8ec349", + "uncompress": true, + "rel-path": "atl-cammoun2012/fsaverage5" }, "fsaverage6": { "url-type": "osf", "url": [ - "mb37e", - "5e189a1b5734190380804072" + "udpv8", + "67326ef766af3db75e62f62f" ], - "md5": "1df743bff13316f67bd41d13ec691c97" + "md5": "a810eed6e19a6ccbf01253312da1f291", + "uncompress": true, + "rel-path": "atl-cammoun2012/fsaverage6" }, "MNI152NLin2009aSym": { "url-type": "osf", "url": [ - "mb37e", - "5e2f4bf0e71ef800301880c2" + "udpv8", + "67326efaee12e85a3662f7c6" ], - "md5": "9da30bad22d732aa5f00a6d178d087c4" + "md5": "7cc281d6916baf78bb53b12806d28454", + "uncompress": true, + "rel-path": "atl-cammoun2012/MNI152NLin2009aSym" }, "fslr32k": { "url-type": "osf", "url": [ - "mb37e", - "5e2f4bf1e71ef80027189c56" + "udpv8", + "67326ef7bbbfa7f6feb2f071" ], - "md5": "a5177319d5e0b8825a91d503ded1a59e" + "md5": "679575fbc83474730fd77fa17a522ed4", + "uncompress": true, + "rel-path": "atl-cammoun2012/fslr32k" } }, "atl-pauli2018": { - "probabilistic": { - "url-type": "osf", - "url": [ - "jkzwp", - "5b11fa3364f25a001973dce0" - ], - "md5": "62dd6ff405d3a8b89ee188cafa3a7f6a", - "folder-name": "atl-pauli2018", - "file-name": "atl-Pauli2018_space-MNI152NLin2009cAsym_hemi-both_probabilistic.nii.gz" - }, - "deterministic": { - "url-type": "osf", - "url": [ - "jkzwp", - "5b11fa2ff1f288000e625a7f" - ], - "md5": "5a5b6246921be08456304875447c68ed", - "folder-name": "atl-pauli2018", - "file-name": "atl-Pauli2018_space-MNI152NLin2009cAsym_hemi-both_deterministic.nii.gz" - }, - "info": { - "url-type": "osf", - "url": [ - "mb37e", - "5c93b4f034062c001b1ef50d" - ], - "md5": "390a693abeb1a583151f30aa8798bab5", - "folder-name": "atl-pauli2018", - "file-name": "atl-Pauli2018_space-MNI152NLin2009cAsym_info.csv" - } + "url-type": "osf", + "url": [ + "udpv8", + "6732834503ad88c31bb2f1c7" + ], + "md5": "214a9c4381919c00af1f0b97ea9f8ed7", + "uncompress": true, + "rel-path": "atl-pauli2018" }, "tpl-conte69": { "url-type": "osf", "url": [ - "fvuh8", - "5b198ec5ec24e20011b48548" + "udpv8", + "67327010ad7d0b0b8c4d6d92" ], - "md5": "bd944e3f9f343e0e51e562b440960529" + "md5": "bd944e3f9f343e0e51e562b440960529", + "uncompress": true, + "rel-path": "tpl-conte69" + }, "tpl-yerkes19": { "url-type": "osf", "url": [ - "mb37e", - "60ae93d504e91a005f1761ab" + "udpv8", + "67327067f8f06756c60ddedd" ], - "md5": "9ee4f1605fb690a85b04b61549d62925" + "md5": "9ee4f1605fb690a85b04b61549d62925", + "uncompress": true, + "rel-path": "tpl-yerkes19" }, "tpl-fsaverage": { "fsaverage": { "url-type": "osf", "url": [ - "mb37e", - "5c82830a1d73810018bdacea" + "udpv8", + "67327113dab4e926760ddf37" ], - "md5": "1e82c52ed21d06d4e6e7341c725c5262" + "md5": "a92a40fc0db1dfd88159cbdf517a25da", + "uncompress": true, + "rel-path": "tpl-fsaverage/fsaverage" }, "fsaverage3": { "url-type": "osf", "url": [ - "mb37e", - "5d9f83b6f6b03e000e1ba285" + "udpv8", + "673271131ad23303434d6f68" ], - "md5": "b4182495d341364e3f7c5b86284d8d20" + "md5": "93c6d815aca12da38233c8b70a6e31e1", + "uncompress": true, + "rel-path": "tpl-fsaverage/fsaverage3" }, "fsaverage4": { "url-type": "osf", "url": [ - "mb37e", - "5d9f83b7fcf91f00111c7473" + "udpv8", + "67327115ad7d0b0b8c4d6e2c" ], - "md5": "5a481421dc1286c7bd9b8a47db5fad0b" + "md5": "7f7322d97d2e7bd8fb17dd09803f0e07", + "uncompress": true, + "rel-path": "tpl-fsaverage/fsaverage4" }, "fsaverage5": { "url-type": "osf", "url": [ - "mb37e", - "5d9f83b6f6b03e00101c932f" + "udpv8", + "67327116b8b930019c62fd6b" ], - "md5": "cc75f7290c03970a8b8a06dfc215e925" + "md5": "b79f2583917e2a5f79a4768e2b6e27e6", + "uncompress": true, + "rel-path": "tpl-fsaverage/fsaverage5" }, "fsaverage6": { "url-type": "osf", "url": [ - "mb37e", - "5d9f83b7a7bc73000cea05f1" + "udpv8", + "67327118c1e3bbde16b2f05f" ], - "md5": "8f75b95c0e47ae935d10745baefa2c49" + "md5": "a4695232aa1a1bad644f9c5dbf52eca5", + "uncompress": true, + "rel-path": "tpl-fsaverage/fsaverage6" } }, "tpl-civet": { @@ -144,114 +149,131 @@ "civet41k": { "url-type": "osf", "url": [ - "mb37e", - "601daffd84ecf800fe031868" + "udpv8", + "673272bedab4e926760ddf70" ], - "md5": "b27219c876464992e1b61da1c60d8d6e" + "md5": "57feb3db87e81508648a73e72ac427d6", + "uncompress": true, + "rel-path": "tpl-civet/v1/civet41k" } }, "v2": { "civet41k": { "url-type": "osf", "url": [ - "mb37e", - "601dafe77ad0a80119d9483c" + "udpv8", + "673272f252f6f10b568ea5ae" ], - "md5": "a47b015e471c6a800d236f107fda5b4a" + "md5": "a91a092c4a92e95f817230863cf05a2e", + "uncompress": true, + "rel-path": "tpl-civet/v2/civet41k" }, "civet164k": { "url-type": "osf", "url": [ - "mb37e", - "601dafe87ad0a8011ad94938" + "udpv8", + "673272f266af3db75e62fb33" ], - "md5": "02537ea65d5366acd8de729022a34bab" + "md5": "2c26ea033a905253b734889d08e1efdd", + "uncompress": true, + "rel-path": "tpl-civet/v2/civet164k" } } }, "ds-famous_gmat": { "url-type": "osf", "url": [ - "mb37e", - "664683ca4664da9ebced6b70" + "udpv8", + "67327344a6dba6adff62f48c" ], - "md5": "b803de1058579881a759f475704e9f35" + "md5": "b803de1058579881a759f475704e9f35", + "uncompress": true, + "rel-path": "ds-famous_gmat" }, "ds-vazquez_rodriguez2019": { "url-type": "osf", "url": [ - "mb37e", - "5d9f5aa4f6b03e000e1b819e" + "udpv8", + "6732736066af3db75e62fb5b" ], - "md5": "c710365a2cc5cddb8a2fbb5f6ae421a3" + "md5": "c710365a2cc5cddb8a2fbb5f6ae421a3", + "uncompress": true, + "rel-path": "ds-vazquez_rodriguez2019" }, "atl-schaefer2018": { "fsaverage": { "url-type": "osf", "url": [ - "mb37e", - "5dbc8d7dcfc96c000dc3581c" + "udpv8", + "673273c54b79ea9a5062f4b3" ], - "md5": "74dfe4237efaccabf057897c49e8af94" + "md5": "de53b3faaa076442ae8ddb5ef62d79e8", + "uncompress": true, + "rel-path": "atl-schaefer2018/fsaverage" }, "fsaverage5": { "url-type": "osf", "url": [ - "mb37e", - "5dbc8d7daf84c3000eebffb2" + "udpv8", + "673273c550aeaa53a10dded5" ], - "md5": "45a8c784f1979eb33a119bdab912a51f" + "md5": "838dd438e9d10d0dda055b32d02a63f9", + "uncompress": true, + "rel-path": "atl-schaefer2018/fsaverage5" }, "fsaverage6": { "url-type": "osf", "url": [ - "mb37e", - "5dbc8d7bcfc96c000ec6dca2" + "udpv8", + "673273c72e830a5cbd0dde84" ], - "md5": "8738daccab4648c3e891a1c8d3a9ec1f" + "md5": "06461e983ba10c64621cae6250c0c8cd", + "uncompress": true, + "rel-path": "atl-schaefer2018/fsaverage6" }, "fslr32k": { "url-type": "osf", "url": [ - "mb37e", - "5e3086e4af75930094bdd507" + "udpv8", + "673273c790023be9c44d6b5a" ], - "md5": "d8378f33107ed5d98c27e8070ebb5aa2" + "md5": "770401d8fdcec6ca05f797e77230338e", + "uncompress": true, + "rel-path": "atl-schaefer2018/fslr32k" } }, "atl-mmpall": { "fslr32k": { "url-type": "osf", "url": [ - "mb37e", - "6047bac259e910009b83114f" + "udpv8", + "6732741eb9a26212244d6bbb" ], - "md5": "fd641742685a239d9c3f60e19a280ca2" + "md5": "38371de50d8942dcf464d2b9c22edebf", + "uncompress": true, + "rel-path": "atl-mmpall/fslr32k" } }, "atl-voneconomo_koskinas": { "url-type": "osf", "url": [ - "mb37e", - "5ed80005fabc45000d639900" + "udpv8", + "6732744252f6f10b568ea607" ], - "md5": "67085e2577d21dc3a742f4fcde6e3b18" + "md5": "67085e2577d21dc3a742f4fcde6e3b18", + "uncompress": true, + "rel-path": "atl-voneconomo_koskinas" }, "tpl-hcp_standards": { "standard_mesh_atlases": { "url-type": "osf", "url": [ - "mb37e", - "6643d2ab2eacc48a57097091" + "udpv8", + "6732749850aeaa53a10ddf2c" ], - "md5": "806abac71f76b8dba8af467ef313c3f7", - "keys": [ - "fs_LR_32k", - "fsaverage", - "fsaverage5", - "fsaverage6", - "MNI152NLin2009cAsym" - ] + "md5": "c13bf257f0a7dea7955a83577cfe6659", + "uncompress": true, + "rel-path": "tpl-hcp_standards/standard_mesh_atlases" } }, "ds-hansen_manynetworks": { @@ -261,7 +283,9 @@ "hansen_many_networks", "v1.0.0" ], - "folder-name": "hansen_many_networks-1.0.0", - "md5": "9e503c759506293aa441054cfd206ccc" + "md5": "9e503c759506293aa441054cfd206ccc", + "uncompress": true, + "rename-folder": "hansen_many_networks-1.0.0", + "rel-path": "ds-hansen_manynetworks" } } diff --git a/netneurotools/datasets/datasets_utils.py b/netneurotools/datasets/datasets_utils.py index 82bd228..5ab90cd 100644 --- a/netneurotools/datasets/datasets_utils.py +++ b/netneurotools/datasets/datasets_utils.py @@ -2,14 +2,25 @@ import json import os +import shutil +from pathlib import Path from collections import namedtuple import importlib.resources -SURFACE = namedtuple('Surface', ('lh', 'rh')) +try: + # nilearn 0.10.3 + from nilearn.datasets._utils import fetch_single_file as _fetch_file +except ImportError: + from nilearn.datasets.utils import _fetch_file + + +SURFACE = namedtuple("Surface", ("lh", "rh")) FREESURFER_IGNORE = [ - 'unknown', 'corpuscallosum', 'Background+FreeSurfer_Defined_Medial_Wall' + "unknown", + "corpuscallosum", + "Background+FreeSurfer_Defined_Medial_Wall", ] @@ -30,15 +41,13 @@ def _get_data_dir(data_dir=None): Path to use as data directory """ if data_dir is None: - data_dir = os.environ.get('NNT_DATA', os.path.join('~', 'nnt-data')) - data_dir = os.path.expanduser(data_dir) - if not os.path.exists(data_dir): - os.makedirs(data_dir) - + data_dir = os.environ.get("NNT_DATA", str(Path.home() / "nnt-data")) + data_dir = Path(data_dir).expanduser() + data_dir.mkdir(parents=True, exist_ok=True) return data_dir -def _decode_urls(data): +def _decode_url(url_type, url): """ Format `data` object with OSF API URL. @@ -55,20 +64,105 @@ def _decode_urls(data): OSF_API = "https://files.osf.io/v1/resources/{}/providers/osfstorage/{}" GITHUB_RELEASE_API = "https://github.com/{}/{}/archive/refs/tags/{}.tar.gz" - if isinstance(data, str) or isinstance(data, list): - return data - elif 'url' in data: - if data['url-type'] == 'osf': - data['url'] = OSF_API.format(*data['url']) - elif data['url-type'] == 'github-release': - data['url'] = GITHUB_RELEASE_API.format(*data['url']) - else: - raise ValueError("URL type {} not recognized".format(data['url-type'])) + if url_type == "osf": + out_url = OSF_API.format(*url) + elif url_type == "github-release": + out_url = GITHUB_RELEASE_API.format(*url) + else: + raise ValueError("URL type {} not recognized".format(url_type)) - for key, value in data.items(): - data[key] = _decode_urls(value) + return out_url - return data + +def fetch_file(dataset_name, keys=None, force=False, data_dir=None, verbose=1): + """ + Fetch file(s) for dataset `dataset_name`. + + Parameters + ---------- + dataset_name : str + Name of dataset to fetch + keys : str or list, optional + If provided, will only fetch the specified key(s) from the dataset. + Default: None + force : bool, optional + If True, will overwrite existing dataset. Default: False + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + verbose : int, optional + Verbosity level. Default: 1 + """ + data_dir = _get_data_dir(data_dir=data_dir) + info = _get_dataset_info(dataset_name) + + # deal with nested keys + if keys is not None: + if isinstance(keys, str): + keys = [keys] + for k in keys: + info = info[k] + + if "uncompress" in info and info["uncompress"]: + targ_folder = data_dir / info["rel-path"] + + # check if folder exists and remove if force=True + if targ_folder.exists(): + if force: + shutil.rmtree(targ_folder) + if verbose: + print(f"Dataset {dataset_name} already exists. Overwriting.") + else: + if verbose: + print(f"Dataset {dataset_name} already exists. Skipping download.") + return targ_folder + + # download compressed file + dl_fname = _fetch_file( + _decode_url(info["url-type"], info["url"]), + data_dir, + resume=True, + md5sum=info["md5"], + verbose=verbose, + ) + + # extract contents and remove compressed file + shutil.unpack_archive(dl_fname, targ_folder.parent, format="gztar") + os.remove(dl_fname) + + # rename folder if necessary + if "rename-folder" in info: + shutil.move(targ_folder.parent / info["rename-folder"], targ_folder) + if verbose: + print(f"Downloaded {dataset_name} to {data_dir}") + return targ_folder + else: + targ_file = data_dir / dataset_name / info["file-name"] + + # check if file exists and remove if force=True + if targ_file.exists(): + if force: + os.remove(targ_file) + if verbose: + print(f"Dataset {dataset_name} already exists. Overwriting.") + else: + if verbose: + print(f"Dataset {dataset_name} already exists. Skipping download.") + return targ_file + # download file + dl_fname = _fetch_file( + _decode_url(info["url-type"], info["url"]), + data_dir / dataset_name, + resume=True, + md5sum=info["md5"], + verbose=verbose, + ) + # move/rename file + shutil.move(dl_fname, targ_file) + if verbose: + print(f"Downloaded {dataset_name} to {data_dir}") + return targ_file def _load_resource_json(relative_path): @@ -86,11 +180,12 @@ def _load_resource_json(relative_path): JSON file loaded as a dictionary """ # handling pkg_resources.resource_filename deprecation - if getattr(importlib.resources, 'files', None) is not None: + if getattr(importlib.resources, "files", None) is not None: f_resource = importlib.resources.files("netneurotools") / relative_path else: from pkg_resources import resource_filename - f_resource = resource_filename('netneurotools', relative_path) + + f_resource = resource_filename("netneurotools", relative_path) with open(f_resource) as src: resource_json = json.load(src) @@ -98,8 +193,8 @@ def _load_resource_json(relative_path): return resource_json -NNT_DATASETS = _load_resource_json('datasets/datasets.json') -NNT_DATASETS = _decode_urls(NNT_DATASETS) +NNT_DATASETS = _load_resource_json("datasets/datasets.json") +# NNT_DATASETS = _decode_urls(NNT_DATASETS) def _get_dataset_info(name): @@ -127,7 +222,7 @@ def _get_dataset_info(name): ) from None -NNT_REFERENCES = _load_resource_json('datasets/references.json') +NNT_REFERENCES = _load_resource_json("datasets/references.json") def _get_reference_info(name, verbose=1, return_dict=False): @@ -184,19 +279,24 @@ def _fill_reference_json(bib_file, json_file, overwrite=False, use_defaults=Fals None """ if use_defaults: - bib_file = \ + bib_file = ( importlib.resources.files("netneurotools") / "datasets/netneurotools.bib" - json_file = \ + ) + json_file = ( importlib.resources.files("netneurotools") / "datasets/references.json" + ) from pybtex import PybtexEngine + engine = PybtexEngine() def _get_citation(key): s = engine.format_from_file( - filename=bib_file, style="unsrt", - citations=[key], output_backend="plaintext" - ) + filename=bib_file, + style="unsrt", + citations=[key], + output_backend="plaintext", + ) return s.strip("\n").replace("[1] ", "") with open(json_file) as src: @@ -239,17 +339,17 @@ def _check_freesurfer_subjid(subject_id, subjects_dir=None): # check inputs for subjects_dir and subject_id if subjects_dir is None or not os.path.isdir(subjects_dir): try: - subjects_dir = os.environ['SUBJECTS_DIR'] + subjects_dir = Path(os.environ["SUBJECTS_DIR"]) except KeyError: - subjects_dir = os.getcwd() + subjects_dir = Path.cwd() else: - subjects_dir = os.path.abspath(subjects_dir) + subjects_dir = Path(subjects_dir).resolve() - subjdir = os.path.join(subjects_dir, subject_id) - if not os.path.isdir(subjdir): + subjdir = subjects_dir / subject_id + if not subjdir.is_dir(): raise FileNotFoundError( - f'Cannot find specified subject id {subject_id} in ' - f'provided subject directory {subjects_dir}.' + f"Cannot find specified subject id {subject_id} in " + f"provided subject directory {subjects_dir}." ) return subject_id, subjects_dir @@ -278,14 +378,15 @@ def _get_freesurfer_subjid(subject_id, subjects_dir=None): try: subject_id, subjects_dir = _check_freesurfer_subjid(subject_id, subjects_dir) except FileNotFoundError: - if 'fsaverage' not in subject_id: + if "fsaverage" not in subject_id: raise ValueError( - f'Provided subject {subject_id} does not exist in provided ' - f'subjects_dir {subjects_dir}' + f"Provided subject {subject_id} does not exist in provided " + f"subjects_dir {subjects_dir}" ) from None from .fetch_template import fetch_fsaverage + fetch_fsaverage(subject_id) - subjects_dir = os.path.join(_get_data_dir(), 'tpl-fsaverage') + subjects_dir = os.path.join(_get_data_dir(), "tpl-fsaverage") subject_id, subjects_dir = _check_freesurfer_subjid(subject_id, subjects_dir) return subject_id, subjects_dir diff --git a/netneurotools/datasets/fetch_atlas.py b/netneurotools/datasets/fetch_atlas.py index 5df542c..921c9a1 100644 --- a/netneurotools/datasets/fetch_atlas.py +++ b/netneurotools/datasets/fetch_atlas.py @@ -1,25 +1,16 @@ """Functions for fetching atlas data.""" + import itertools -import warnings -try: - # nilearn 0.10.3 - from nilearn.datasets._utils import fetch_files -except ImportError: - from nilearn.datasets.utils import _fetch_files as fetch_files from sklearn.utils import Bunch -from .datasets_utils import ( - SURFACE, - _get_data_dir, _get_dataset_info, _get_reference_info -) +from .datasets_utils import SURFACE, _get_reference_info, fetch_file def fetch_cammoun2012( - version='MNI152NLin2009aSym', - data_dir=None, resume=True, verbose=1 - ): + version="MNI152NLin2009aSym", force=False, data_dir=None, verbose=1 +): """ Download files for Cammoun et al., 2012 multiscale parcellation. @@ -47,12 +38,12 @@ def fetch_cammoun2012( Other Parameters ---------------- + force : bool, optional + If True, will overwrite existing dataset. Default: False data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: True verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 @@ -69,95 +60,81 @@ def fetch_cammoun2012( spectrum mri. Journal of neuroscience methods, 203(2):386\u2013397, 2012. """ - if version == 'surface': - warnings.warn('Providing `version="surface"` is deprecated and will ' - 'be removed in a future release. For consistent ' - 'behavior please use `version="fsaverage"` instead.', - DeprecationWarning, stacklevel=2) - version = 'fsaverage' - elif version == 'volume': - warnings.warn('Providing `version="volume"` is deprecated and will ' - 'be removed in a future release. For consistent ' - 'behavior please use `version="MNI152NLin2009aSym"` ' - 'instead.', - DeprecationWarning, stacklevel=2) - version = 'MNI152NLin2009aSym' - versions = [ - 'gcs', 'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k', - 'MNI152NLin2009aSym' + "gcs", + "fsaverage", + "fsaverage5", + "fsaverage6", + "fslr32k", + "MNI152NLin2009aSym", ] if version not in versions: raise ValueError( - f'The version of Cammoun et al., 2012 parcellation ' - f'requested {version} does not exist. Must be one of {versions}' + f"The version of Cammoun et al., 2012 parcellation " + f"requested {version} does not exist. Must be one of {versions}" ) - dataset_name = 'atl-cammoun2012' + dataset_name = "atl-cammoun2012" _get_reference_info(dataset_name, verbose=verbose) - keys = ['scale033', 'scale060', 'scale125', 'scale250', 'scale500'] + keys = ["scale033", "scale060", "scale125", "scale250", "scale500"] - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name)[version] - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': f'{dataset_name}.tar.gz' - } + fetched = fetch_file( + dataset_name, keys=version, force=force, data_dir=data_dir, verbose=verbose + ) - # filenames differ based on selected version of dataset - if version == 'MNI152NLin2009aSym': - _filenames = [ - f'{dataset_name}/{version}/' - f'atl-Cammoun2012_space-MNI152NLin2009aSym_res-{res[-3:]}' - f'_deterministic{suff}' - for res in keys for suff in ['.nii.gz'] - ] + [ - f'{dataset_name}/{version}/' - f'atl-Cammoun2012_space-MNI152NLin2009aSym_info.csv' - ] - elif version == 'fslr32k': - _filenames = [ - f'{dataset_name}/{version}/' - f'atl-Cammoun2012_space-fslr32k_res-{res[-3:]}_hemi-{hemi}' - f'_deterministic{suff}' - for res in keys for hemi in ['L', 'R'] for suff in ['.label.gii'] - ] - elif version in ('fsaverage', 'fsaverage5', 'fsaverage6'): - _filenames = [ - f'{dataset_name}/{version}/' - f'atl-Cammoun2012_space-{version}_res-{res[-3:]}_hemi-{hemi}' - f'_deterministic{suff}' - for res in keys for hemi in ['L', 'R'] for suff in ['.annot'] - ] - else: - _filenames = [ - f'{dataset_name}/{version}/' - f'atl-Cammoun2012_res-{res[5:]}_hemi-{hemi}' - f'_probabilistic{suff}' - for res in keys[:-1] + ['scale500v1', 'scale500v2', 'scale500v3'] - for hemi in ['L', 'R'] for suff in ['.gcs', '.ctab'] - ] - _files = [(f, info['url'], opts) for f in _filenames] - data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) - - if version == 'MNI152NLin2009aSym': - keys += ['info'] - elif version in ('fslr32k', 'fsaverage', 'fsaverage5', 'fsaverage6'): - data = [SURFACE(*data[i:i + 2]) for i in range(0, len(data), 2)] + if version == "MNI152NLin2009aSym": + _fname = "atl-Cammoun2012_space-MNI152NLin2009aSym_res-{}_deterministic.nii.gz" + data = { + k: fetched + / _fname.format(k[-3:]) + for k in keys + } + data["info"] = fetched / "atl-Cammoun2012_space-MNI152NLin2009aSym_info.csv" + elif version == "fslr32k": + _fname = "atl-Cammoun2012_space-fslr32k_res-{}_hemi-{}_deterministic.label.gii" + data = { + k: SURFACE( + fetched / _fname.format(k[-3:], "L"), + fetched / _fname.format(k[-3:], "R") + ) + for k in keys + } + elif version in ("fsaverage", "fsaverage5", "fsaverage6"): + _fname = "atl-Cammoun2012_space-{}_res-{}_hemi-{}_deterministic.annot" + data = { + k: SURFACE( + fetched / _fname.format(version, k[-3:], "L"), + fetched / _fname.format(version, k[-3:], "R") + ) + for k in keys + } else: - data = [data[::2][i:i + 2] for i in range(0, len(data) // 2, 2)] - # deal with the fact that last scale is split into three files :sigh: - data = data[:-3] + [list(itertools.chain.from_iterable(data[-3:]))] + data = { + k: [ + fetched / f"atl-Cammoun2012_res-{k[5:]}_hemi-L_probabilistic.gcs", + fetched / f"atl-Cammoun2012_res-{k[5:]}_hemi-R_probabilistic.gcs", + ] + for k in keys[:-1] + } + data[keys[-1]] = list( + itertools.chain.from_iterable( + [ + [ + fetched + / f"atl-Cammoun2012_res-{k[5:]}_hemi-L_probabilistic.gcs", + fetched + / f"atl-Cammoun2012_res-{k[5:]}_hemi-R_probabilistic.gcs", + ] + for k in ["scale500v1", "scale500v2", "scale500v3"] + ] + ) + ) - return Bunch(**dict(zip(keys, data))) + return Bunch(**data) -def fetch_schaefer2018( - version='fsaverage', - data_dir=None, resume=True, verbose=1 - ): +def fetch_schaefer2018(version="fsaverage", force=False, data_dir=None, verbose=1): """ Download FreeSurfer .annot files for Schaefer et al., 2018 parcellation. @@ -179,12 +156,12 @@ def fetch_schaefer2018( Other Parameters ---------------- + force : bool, optional + If True, will overwrite existing dataset. Default: False data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: True verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 @@ -201,55 +178,42 @@ def fetch_schaefer2018( functional connectivity mri. Cerebral cortex, 28(9):3095\u20133114, 2018. """ - versions = ['fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k'] + versions = ["fsaverage", "fsaverage5", "fsaverage6", "fslr32k"] if version not in versions: raise ValueError( - f'The version of Schaefer et al., 2018 parcellation ' + f"The version of Schaefer et al., 2018 parcellation " f'requested "{version}" does not exist. Must be one of {versions}' ) - dataset_name = 'atl-schaefer2018' + dataset_name = "atl-schaefer2018" _get_reference_info(dataset_name, verbose=verbose) - keys = [ - f'{p}Parcels{n}Networks' - for p in range(100, 1001, 100) for n in [7, 17] - ] + keys = [f"{p}Parcels{n}Networks" for p in range(100, 1001, 100) for n in [7, 17]] - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name)[version] - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': f'{dataset_name}.tar.gz' - } + fetched = fetch_file( + dataset_name, keys=version, force=force, data_dir=data_dir, verbose=verbose + ) - if version == 'fslr32k': - hemispheres, suffix = ['LR'], 'dlabel.nii' + if version == "fslr32k": + _fname = "atl-Schaefer2018_space-{}_hemi-{}_desc-{}_deterministic.dlabel.nii" + data = { + k: fetched / _fname.format(version, "LR", k) + for k in keys + } else: - hemispheres, suffix = ['L', 'R'], 'annot' - - _filenames = [ - f'{dataset_name}/{version}/' - f'atl-Schaefer2018_space-{version}_hemi-{hemi}_desc-{desc}' - f'_deterministic.{suffix}' - for desc in keys for hemi in hemispheres - ] - - _files = [(f, info['url'], opts) for f in _filenames] - - data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) - - if suffix == 'annot': - data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)] + _fname = "atl-Schaefer2018_space-{}_hemi-{}_desc-{}_deterministic.annot" + data = { + k: SURFACE( + fetched / _fname.format(version, "L", k), + fetched / _fname.format(version, "R", k) + ) + for k in keys + } - return Bunch(**dict(zip(keys, data))) + return Bunch(**data) -def fetch_mmpall( - version='fslr32k', - data_dir=None, resume=True, verbose=1 - ): +def fetch_mmpall(version="fslr32k", force=False, data_dir=None, verbose=1): """ Download .label.gii files for Glasser et al., 2016 MMPAll atlas. @@ -271,12 +235,12 @@ def fetch_mmpall( Other Parameters ---------------- + force : bool, optional + If True, will overwrite existing dataset. Default: False data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: True verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 @@ -292,37 +256,27 @@ def fetch_mmpall( Beckmann, Mark Jenkinson, and others. A multi-modal parcellation of human cerebral cortex. Nature, 536(7615):171\u2013178, 2016. """ - versions = ['fslr32k'] + versions = ["fslr32k"] if version not in versions: raise ValueError( - f'The version of Glasser et al., 2016 parcellation ' + f"The version of Glasser et al., 2016 parcellation " f'requested "{version}" does not exist. Must be one of {versions}' ) - dataset_name = 'atl-mmpall' + dataset_name = "atl-mmpall" _get_reference_info(dataset_name, verbose=verbose) - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name)[version] - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': f'{dataset_name}.tar.gz' - } - - _filenames = [ - f'{dataset_name}/{version}/' - f'atl-MMPAll_space-{version}_hemi-{hemi}_deterministic.label.gii' - for hemi in ['L', 'R'] - ] - _files = [(f, info['url'], opts) for f in _filenames] - - data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) + fetched = fetch_file( + dataset_name, keys=version, force=force, data_dir=data_dir, verbose=verbose + ) - return SURFACE(*data) + return SURFACE( + fetched / f"atl-MMPAll_space-{version}_hemi-L_deterministic.label.gii", + fetched / f"atl-MMPAll_space-{version}_hemi-R_deterministic.label.gii", + ) -def fetch_pauli2018(data_dir=None, resume=True, verbose=1): +def fetch_pauli2018(force=False, data_dir=None, verbose=1): """ Download files for Pauli et al., 2018 subcortical parcellation. @@ -338,12 +292,12 @@ def fetch_pauli2018(data_dir=None, resume=True, verbose=1): Other Parameters ---------------- + force : bool, optional + If True, will overwrite existing dataset. Default: False data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: True verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 @@ -358,29 +312,20 @@ def fetch_pauli2018(data_dir=None, resume=True, verbose=1): high-resolution probabilistic in vivo atlas of human subcortical brain nuclei. Scientific data, 5(1):1\u201313, 2018. """ - dataset_name = 'atl-pauli2018' + dataset_name = "atl-pauli2018" _get_reference_info(dataset_name, verbose=verbose) - keys = ['probabilistic', 'deterministic', 'info'] + fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose) - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name) - - _files = [] - for _, v in info.items(): - _f = f'{v["folder-name"]}/{v["file-name"]}' - _url = v['url'] - _opts = { - 'md5sum': v['md5'], - 'move': f'{v["folder-name"]}/{v["file-name"]}' - } - _files.append( - (_f, _url, _opts) - ) - - data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) + data = { + "probabilistic": fetched + / "atl-pauli2018_space-MNI152NLin2009cAsym_hemi-both_probabilistic.nii.gz", + "deterministic": fetched + / "atl-pauli2018_space-MNI152NLin2009cAsym_hemi-both_deterministic.nii.gz", + "info": fetched / "atl-pauli2018_space-MNI152NLin2009cAsym_info.csv", + } - return Bunch(**dict(zip(keys, data))) + return Bunch(**data) def fetch_ye2020(): @@ -388,7 +333,7 @@ def fetch_ye2020(): pass -def fetch_voneconomo(data_dir=None, url=None, resume=True, verbose=1): +def fetch_voneconomo(force=False, data_dir=None, verbose=1): """ Fetch von-Economo Koskinas probabilistic FreeSurfer atlas. @@ -403,12 +348,12 @@ def fetch_voneconomo(data_dir=None, url=None, resume=True, verbose=1): Other Parameters ---------------- + force : bool, optional + If True, will overwrite existing dataset. Default: False data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: True verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 @@ -423,29 +368,21 @@ def fetch_voneconomo(data_dir=None, url=None, resume=True, verbose=1): Schmidt, and Martijn P van den Heuvel. An mri von economo\u2013koskinas atlas. NeuroImage, 170:249\u2013256, 2018. """ - dataset_name = 'atl-voneconomo_koskinas' + dataset_name = "atl-voneconomo_koskinas" _get_reference_info(dataset_name, verbose=verbose) - keys = ['gcs', 'ctab', 'info'] - - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name) - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': f'{dataset_name}.tar.gz' + fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose) + + data = { + "gcs": SURFACE( + fetched / "atl-vonEconomoKoskinas_hemi-L_probabilistic.gcs", + fetched / "atl-vonEconomoKoskinas_hemi-R_probabilistic.gcs", + ), + "ctab": SURFACE( + fetched / "atl-vonEconomoKoskinas_hemi-L_probabilistic.ctab", + fetched / "atl-vonEconomoKoskinas_hemi-R_probabilistic.ctab", + ), + "info": fetched / "atl-vonEconomoKoskinas_info.csv", } - _filenames = [ - f'{dataset_name}/' - f'atl-vonEconomoKoskinas_hemi-{hemi}_probabilistic.{suff}' - for hemi in ['L', 'R'] for suff in ['gcs', 'ctab'] - ] + [ - f'{dataset_name}/atl-vonEconomoKoskinas_info.csv' - ] - _files = [(f, info['url'], opts) for f in _filenames] - data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) - - data = [SURFACE(*data[:-1:2])] + [SURFACE(*data[1:-1:2])] + [data[-1]] - - return Bunch(**dict(zip(keys, data))) + return Bunch(**data) diff --git a/netneurotools/datasets/fetch_project.py b/netneurotools/datasets/fetch_project.py index ce3def1..4329728 100644 --- a/netneurotools/datasets/fetch_project.py +++ b/netneurotools/datasets/fetch_project.py @@ -1,24 +1,16 @@ """Functions for fetching project data.""" + import os -from pathlib import Path import numpy as np -try: - # nilearn 0.10.3 - from nilearn.datasets._utils import fetch_files -except ImportError: - from nilearn.datasets.utils import _fetch_files as fetch_files - from sklearn.utils import Bunch -from .datasets_utils import ( - _get_data_dir, _get_dataset_info, _get_reference_info -) +from .datasets_utils import _get_data_dir, _get_reference_info, fetch_file from ._mirchi2018 import _get_fc, _get_panas -def fetch_vazquez_rodriguez2019(data_dir=None, resume=True, verbose=1): +def fetch_vazquez_rodriguez2019(force=False, data_dir=None, verbose=1): """ Download files from Vazquez-Rodriguez et al., 2019, PNAS. @@ -34,12 +26,12 @@ def fetch_vazquez_rodriguez2019(data_dir=None, resume=True, verbose=1): Other Parameters ---------------- + force : bool, optional + If True, will overwrite existing dataset. Default: False data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: True verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 @@ -53,37 +45,21 @@ def fetch_vazquez_rodriguez2019(data_dir=None, resume=True, verbose=1): Proceedings of the National Academy of Sciences, 116(42):21219\u201321227, 2019. """ - dataset_name = 'ds-vazquez_rodriguez2019' + dataset_name = "ds-vazquez_rodriguez2019" _get_reference_info(dataset_name, verbose=verbose) - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name) - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': f'{dataset_name}.tar.gz' - } - fetched = fetch_files( - data_dir, - files=[(dataset_name, info['url'], opts)], - resume=resume, verbose=verbose - ) - fetched = Path(fetched[0]) + fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose) # load data rsq, grad = np.loadtxt( - fetched / "rsquared_gradient.csv", - delimiter=',', skiprows=1 + fetched / "rsquared_gradient.csv", delimiter=",", skiprows=1 ).T - data = { - 'rsquared': rsq, - 'gradient': grad - } + data = {"rsquared": rsq, "gradient": grad} return Bunch(**data) -def fetch_mirchi2018(data_dir=None, resume=True, verbose=1): +def fetch_mirchi2018(force=False, data_dir=None, verbose=1): """ Download (and creates) dataset for replicating Mirchi et al., 2018, SCAN. @@ -102,32 +78,39 @@ def fetch_mirchi2018(data_dir=None, resume=True, verbose=1): Y : (73, 13) numpy.ndarray PANAS subscales from MyConnectome behavioral data """ - data_dir = os.path.join(_get_data_dir(data_dir=data_dir), 'ds-mirchi2018') - os.makedirs(data_dir, exist_ok=True) + data_dir = _get_data_dir(data_dir=data_dir) / "ds-mirchi2018" + data_dir.mkdir(exist_ok=True, parents=True) - X_fname = os.path.join(data_dir, 'myconnectome_fc.npy') - Y_fname = os.path.join(data_dir, 'myconnectome_panas.csv') + X_fname = data_dir / "myconnectome_fc.npy" + Y_fname = data_dir / "myconnectome_panas.csv" if not os.path.exists(X_fname): - X = _get_fc(data_dir=data_dir, resume=resume, verbose=verbose) + X = _get_fc(verbose=verbose) np.save(X_fname, X, allow_pickle=False) else: X = np.load(X_fname, allow_pickle=False) if not os.path.exists(Y_fname): - Y = _get_panas(data_dir=data_dir, resume=resume, verbose=verbose) - np.savetxt(Y_fname, np.column_stack(list(Y.values())), - header=','.join(Y.keys()), delimiter=',', fmt='%i') + Y = _get_panas() + np.savetxt( + Y_fname, + np.column_stack(list(Y.values())), + header=",".join(Y.keys()), + delimiter=",", + fmt="%i", + ) # convert dictionary to structured array before returning - Y = np.array([tuple(row) for row in np.column_stack(list(Y.values()))], - dtype=dict(names=list(Y.keys()), formats=['i8'] * len(Y))) + Y = np.array( + [tuple(row) for row in np.column_stack(list(Y.values()))], + dtype=dict(names=list(Y.keys()), formats=["i8"] * len(Y)), + ) else: - Y = np.genfromtxt(Y_fname, delimiter=',', names=True, dtype=int) + Y = np.genfromtxt(Y_fname, delimiter=",", names=True, dtype=int) return X, Y -def fetch_hansen_manynetworks(data_dir=None, resume=True, verbose=1): +def fetch_hansen_manynetworks(force=False, data_dir=None, verbose=1): """ Download files from Hansen et al., 2023, PLOS Biology. @@ -142,12 +125,12 @@ def fetch_hansen_manynetworks(data_dir=None, resume=True, verbose=1): Other Parameters ---------------- + force : bool, optional + If True, will overwrite existing dataset. Default: False data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: True verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 @@ -156,25 +139,10 @@ def fetch_hansen_manynetworks(data_dir=None, resume=True, verbose=1): ---------- .. [1] """ - dataset_name = 'ds-hansen_manynetworks' + dataset_name = "ds-hansen_manynetworks" _get_reference_info(dataset_name, verbose=verbose) - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name) - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': f'{dataset_name}/{dataset_name}.tar.gz' - } - # the download info["folder-name"].tar.gz was moved to - # {dataset_name}/{dataset_name}.tar.gz and uncompressed - # to keep the same structure as other datasets - fetched = fetch_files( - data_dir, - files=[(f'{dataset_name}/{info["folder-name"]}', info['url'], opts)], - resume=resume, verbose=verbose - ) - fetched = Path(fetched[0]) + fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose) # load data data = { @@ -187,7 +155,7 @@ def fetch_hansen_manynetworks(data_dir=None, resume=True, verbose=1): }, "schaefer400": { "gene": fetched / "data/Schaefer400/gene_coexpression.npy", - } + }, } return Bunch(**data) @@ -218,10 +186,7 @@ def fetch_suarez_mami(): pass -def fetch_famous_gmat( - dataset, - data_dir=None, resume=True, verbose=1 - ): +def fetch_famous_gmat(dataset, force=False, data_dir=None, verbose=1): """ Download files from multi-species connectomes. @@ -248,12 +213,12 @@ def fetch_famous_gmat( Other Parameters ---------------- + force : bool, optional + If True, will overwrite existing dataset. Default: False data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: True verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 @@ -290,53 +255,43 @@ def fetch_famous_gmat( 112(16):E2093\u2013E2101, 2015. """ available_connectomes = [ - 'celegans', - 'drosophila', - 'human_func_scale033', - 'human_func_scale060', - 'human_func_scale125', - 'human_func_scale250', - 'human_func_scale500', - 'human_struct_scale033', - 'human_struct_scale060', - 'human_struct_scale125', - 'human_struct_scale250', - 'human_struct_scale500', - 'macaque_markov', - 'macaque_modha', - 'mouse', - 'rat' + "celegans", + "drosophila", + "human_func_scale033", + "human_func_scale060", + "human_func_scale125", + "human_func_scale250", + "human_func_scale500", + "human_struct_scale033", + "human_struct_scale060", + "human_struct_scale125", + "human_struct_scale250", + "human_struct_scale500", + "macaque_markov", + "macaque_modha", + "mouse", + "rat", ] if dataset not in available_connectomes: raise ValueError( - f'Provided dataset {dataset} not available; ' - f'must be one of {available_connectomes}' + f"Provided dataset {dataset} not available; " + f"must be one of {available_connectomes}" ) - base_dataset_name = 'ds-famous_gmat' + base_dataset_name = "ds-famous_gmat" _get_reference_info(base_dataset_name, verbose=verbose) - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(base_dataset_name) - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': f'{base_dataset_name}.tar.gz' - } - fetched = fetch_files( - data_dir, - files=[(base_dataset_name, info['url'], opts)], - resume=resume, verbose=verbose + fetched = fetch_file( + base_dataset_name, force=force, data_dir=data_dir, verbose=verbose ) - fetched = Path(fetched[0]) data = {} for f in (fetched / dataset).glob("*.csv"): try: - data[f.stem] = np.loadtxt(f, delimiter=',') + data[f.stem] = np.loadtxt(f, delimiter=",") except ValueError: - data[f.stem] = np.loadtxt(f, delimiter=',', dtype=str) + data[f.stem] = np.loadtxt(f, delimiter=",", dtype=str) return Bunch(**data) diff --git a/netneurotools/datasets/fetch_template.py b/netneurotools/datasets/fetch_template.py index fca331e..24bb7d4 100644 --- a/netneurotools/datasets/fetch_template.py +++ b/netneurotools/datasets/fetch_template.py @@ -1,28 +1,20 @@ """Functions for fetching template data.""" - import json -from pathlib import Path -import os.path as op - -try: - # nilearn 0.10.3 - from nilearn.datasets._utils import fetch_files -except ImportError: - from nilearn.datasets.utils import _fetch_files as fetch_files from sklearn.utils import Bunch from .datasets_utils import ( SURFACE, - _get_data_dir, _get_dataset_info, _get_reference_info, _check_freesurfer_subjid + _get_reference_info, + _check_freesurfer_subjid, + fetch_file, ) def fetch_fsaverage( - version='fsaverage', - data_dir=None, resume=True, verbose=1 - ): + version="fsaverage", use_local=False, force=False, data_dir=None, verbose=1 +): """ Download files for fsaverage FreeSurfer template. @@ -35,6 +27,8 @@ def fetch_fsaverage( version : str, optional One of {'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6'}. Default: 'fsaverage' + use_local : bool, optional + If True, will attempt to use local FreeSurfer data. Default: False Returns ------- @@ -45,12 +39,12 @@ def fetch_fsaverage( Other Parameters ---------------- + force : bool, optional + If True, will overwrite existing dataset. Default: False data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: True verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 @@ -67,48 +61,50 @@ def fetch_fsaverage( High-resolution intersubject averaging and a coordinate system for the cortical surface. Human brain mapping, 8(4):272\u2013284, 1999. """ - versions = [ - 'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6' - ] + versions = ["fsaverage", "fsaverage3", "fsaverage4", "fsaverage5", "fsaverage6"] if version not in versions: raise ValueError( - f'The version of fsaverage requested {version} does not ' - f'exist. Must be one of {versions}' + f"The version of fsaverage requested {version} does not " + f"exist. Must be one of {versions}" ) - dataset_name = 'tpl-fsaverage' + dataset_name = "tpl-fsaverage" _get_reference_info(dataset_name, verbose=verbose) - keys = ['orig', 'white', 'smoothwm', 'pial', 'inflated', 'sphere'] - - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name)[version] - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': f'{dataset_name}.tar.gz' - } - - _filenames = [ - f"{version}/surf/{hemi}.{surf}" - for surf in keys for hemi in ['lh', 'rh'] - ] - - try: - # use local FreeSurfer data if available - data_dir = _check_freesurfer_subjid(version)[1] - data = [op.join(data_dir, f) for f in _filenames] - except FileNotFoundError: - _filenames = [f"{dataset_name}/{_}" for _ in _filenames] - _files = [(f, info['url'], opts) for f in _filenames] - data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) + keys = ["orig", "white", "smoothwm", "pial", "inflated", "sphere"] + + if use_local: + try: + data_dir = _check_freesurfer_subjid(version)[1] + data = { + k: SURFACE( + data_dir / f"{version}/surf/lh.{k}", + data_dir / f"{version}/surf/rh.{k}", + ) + for k in keys + } + except FileNotFoundError: + raise FileNotFoundError( + f"Local FreeSurfer data for {version} not found. " + "Please ensure FreeSurfer is installed and properly set up." + ) from None + else: + fetched = fetch_file( + dataset_name, keys=version, force=force, data_dir=data_dir, verbose=verbose + ) - data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)] + data = { + k: SURFACE( + fetched / f"surf/lh.{k}", + fetched / f"surf/rh.{k}", + ) + for k in keys + } - return Bunch(**dict(zip(keys, data))) + return Bunch(**data) -def fetch_hcp_standards(data_dir=None, resume=True, verbose=1): +def fetch_hcp_standards(force=False, data_dir=None, verbose=1): """ Fetch HCP standard mesh atlases for converting between FreeSurfer and HCP. @@ -126,12 +122,12 @@ def fetch_hcp_standards(data_dir=None, resume=True, verbose=1): Other Parameters ---------------- + force : bool, optional + If True, will overwrite existing dataset. Default: False data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: True verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 @@ -151,31 +147,21 @@ def fetch_hcp_standards(data_dir=None, resume=True, verbose=1): .. [3] http://brainvis.wustl.edu/workbench/standard_mesh_atlases.zip .. [4] https://web.archive.org/web/20220121035833/http://brainvis.wustl.edu/workbench/standard_mesh_atlases.zip """ - dataset_name = 'tpl-hcp_standards' + dataset_name = "tpl-hcp_standards" _get_reference_info(dataset_name, verbose=verbose) - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name)["standard_mesh_atlases"] - - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': f'{dataset_name}.tar.gz' - } - fetched = fetch_files( - data_dir, - files=[(f'{dataset_name}/standard_mesh_atlases', info['url'], opts)], - resume=resume, verbose=verbose + fetched = fetch_file( + dataset_name, + keys="standard_mesh_atlases", + force=force, + data_dir=data_dir, + verbose=verbose, ) - fetched = Path(fetched[0]) return fetched -def fetch_civet( - density='41k', version='v1', - data_dir=None, resume=True, verbose=1 - ): +def fetch_civet(density="41k", version="v1", force=False, data_dir=None, verbose=1): """ Fetch CIVET surface files. @@ -200,12 +186,12 @@ def fetch_civet( Other Parameters ---------------- + force : bool, optional + If True, will overwrite existing dataset. Default: False data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: True verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 @@ -229,52 +215,48 @@ def fetch_civet( meeting of the organization for human brain mapping. Florence, Italy, pages 2266, 2006. """ - densities = ['41k', '164k'] + densities = ["41k", "164k"] if density not in densities: raise ValueError( f'The density of CIVET requested "{density}" does not exist. ' - f'Must be one of {densities}' + f"Must be one of {densities}" ) - versions = ['v1', 'v2'] + versions = ["v1", "v2"] if version not in versions: raise ValueError( f'The version of CIVET requested "{version}" does not exist. ' - f'Must be one of {versions}' + f"Must be one of {versions}" ) - if version == 'v1' and density == '164k': - raise ValueError('The "164k" density CIVET surface only exists for ' - 'version "v2"') + if version == "v1" and density == "164k": + raise ValueError( + 'The "164k" density CIVET surface only exists for ' 'version "v2"' + ) - dataset_name = 'tpl-civet' + dataset_name = "tpl-civet" _get_reference_info(dataset_name, verbose=verbose) - keys = ['mid', 'white'] + keys = ["mid", "white"] - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name)[version][f'civet{density}'] + fetched = fetch_file( + dataset_name, + keys=[version, "civet" + density], + force=force, + data_dir=data_dir, + verbose=verbose, + ) - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': f'{dataset_name}.tar.gz' + data = { + k: SURFACE( + fetched / f"tpl-civet_space-ICBM152_hemi-L_den-{density}_{k}.obj", + fetched / f"tpl-civet_space-ICBM152_hemi-R_den-{density}_{k}.obj", + ) + for k in keys } + return Bunch(**data) - _filenames = [ - f"{dataset_name}/{version}/civet{density}/" - f"tpl-civet_space-ICBM152_hemi-{hemi}_den-{density}_{surf}.obj" - for surf in keys for hemi in ['L', 'R'] - ] - _files = [(f, info['url'], opts) for f in _filenames] - - data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) - data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)] - - return Bunch(**dict(zip(keys, data))) - - -def fetch_conte69(data_dir=None, resume=True, verbose=1): +def fetch_conte69(force=False, data_dir=None, verbose=1): """ Download files for Van Essen et al., 2012 Conte69 template. @@ -291,12 +273,12 @@ def fetch_conte69(data_dir=None, resume=True, verbose=1): Other Parameters ---------------- + force : bool, optional + If True, will overwrite existing dataset. Default: False data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: True verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 @@ -313,39 +295,26 @@ def fetch_conte69(data_dir=None, resume=True, verbose=1): 22(10):2241\u20132262, 2012. .. [3] http://brainvis.wustl.edu/wiki/index.php//Caret:Atlases/Conte69_Atlas """ - dataset_name = 'tpl-conte69' + dataset_name = "tpl-conte69" _get_reference_info(dataset_name, verbose=verbose) - keys = ['midthickness', 'inflated', 'vinflated'] - - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name) - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': f'{dataset_name}.tar.gz' - } - - _filenames = [ - f"{dataset_name}/tpl-conte69_space-MNI305_variant-fsLR32k_{res}.{hemi}.surf.gii" - for res in keys for hemi in ['L', 'R'] - ] + [ - f"{dataset_name}/template_description.json" - ] - _files = [(f, info['url'], opts) for f in _filenames] - - data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) + keys = ["midthickness", "inflated", "vinflated"] - with open(data[-1], 'r') as src: - data[-1] = json.load(src) + fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose) - # bundle hemispheres together - data = [SURFACE(*data[:-1][i:i + 2]) for i in range(0, 6, 2)] + [data[-1]] + data = { + k: SURFACE( + fetched / f"tpl-conte69_space-MNI305_variant-fsLR32k_{k}.L.surf.gii", + fetched / f"tpl-conte69_space-MNI305_variant-fsLR32k_{k}.R.surf.gii", + ) + for k in keys + } + data["info"] = json.load(open(fetched / "template_description.json", "r")) - return Bunch(**dict(zip(keys + ['info'], data))) + return Bunch(**data) -def fetch_yerkes19(data_dir=None, resume=None, verbose=1): +def fetch_yerkes19(force=False, data_dir=None, verbose=1): """ Download files for Donahue et al., 2016 Yerkes19 template. @@ -362,12 +331,12 @@ def fetch_yerkes19(data_dir=None, resume=None, verbose=1): Other Parameters ---------------- + force : bool, optional + If True, will overwrite existing dataset. Default: False data_dir : str, optional Path to use as data directory. If not specified, will check for environmental variable 'NNT_DATA'; if that is not set, will use `~/nnt-data` instead. Default: None - resume : bool, optional - Whether to attempt to resume partial download, if possible. Default: True verbose : int, optional Modifies verbosity of download, where higher numbers mean more updates. Default: 1 @@ -382,28 +351,19 @@ def fetch_yerkes19(data_dir=None, resume=None, verbose=1): of Neuroscience, 36(25):6758\u20136770, 2016. .. [2] https://balsa.wustl.edu/reference/show/976nz """ - dataset_name = 'tpl-yerkes19' + dataset_name = "tpl-yerkes19" _get_reference_info(dataset_name, verbose=verbose) - keys = ['midthickness', 'inflated', 'vinflated'] - - data_dir = _get_data_dir(data_dir=data_dir) - info = _get_dataset_info(dataset_name) - opts = { - 'uncompress': True, - 'md5sum': info['md5'], - 'move': f'{dataset_name}.tar.gz' - } - _filenames = [ - f"{dataset_name}/tpl-yerkes19_space-fsLR32k_{res}.{hemi}.surf.gii" - for res in keys for hemi in ['L', 'R'] + keys = ["midthickness", "inflated", "vinflated"] - ] - _files = [(f, info['url'], opts) for f in _filenames] + fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose) - data = fetch_files(data_dir, files=_files, resume=resume, verbose=verbose) - - # bundle hemispheres together - data = [SURFACE(*data[i:i + 2]) for i in range(0, 6, 2)] + data = { + k: SURFACE( + fetched / f"tpl-yerkes19_space-fsLR32k_{k}.L.surf.gii", + fetched / f"tpl-yerkes19_space-fsLR32k_{k}.R.surf.gii", + ) + for k in keys + } - return Bunch(**dict(zip(keys + ['info'], data))) + return Bunch(**data) diff --git a/netneurotools/datasets/tests/test_datasets_utils.py b/netneurotools/datasets/tests/test_datasets_utils.py index 9d5d85c..7ecb0e8 100644 --- a/netneurotools/datasets/tests/test_datasets_utils.py +++ b/netneurotools/datasets/tests/test_datasets_utils.py @@ -10,7 +10,6 @@ ('atl-cammoun2012', ['fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k', 'MNI152NLin2009aSym', 'gcs']), ('tpl-conte69', ['url', 'md5']), - ('atl-pauli2018', ['probabilistic', 'deterministic', 'info']), ('tpl-fsaverage', ['fsaverage' + f for f in ['', '3', '4', '5', '6']]), ('atl-schaefer2018', ['fsaverage', 'fsaverage6', 'fsaverage6']) ]) diff --git a/netneurotools/datasets/tests/test_fetch.py b/netneurotools/datasets/tests/test_fetch.py index b55e58d..f607c97 100644 --- a/netneurotools/datasets/tests/test_fetch.py +++ b/netneurotools/datasets/tests/test_fetch.py @@ -76,11 +76,7 @@ def test_fetch_cammoun2012(self, tmpdir, version, expected): if isinstance(out, (tuple, list)): assert len(out) == e else: - assert isinstance(out, str) and out.endswith('.nii.gz') - - if 'fsaverage' in version: - with pytest.warns(DeprecationWarning): - datasets.fetch_cammoun2012('surface', data_dir=tmpdir, verbose=0) + assert isinstance(out, Path) and str(out).endswith('.nii.gz') @pytest.mark.parametrize('version', [ 'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k' @@ -123,7 +119,7 @@ def test_fetch_voneconomo(self, tmpdir): """Test fetching of von Economo parcellations.""" vek = datasets.fetch_voneconomo(data_dir=tmpdir, verbose=0) assert all(hasattr(vek, k) and len(vek[k]) == 2 for k in ['gcs', 'ctab']) - assert isinstance(vek.get('info'), str) + assert isinstance(vek.get('info'), Path) class TestFetchProject: From ae086b40f410c7b7abfc2c38d6a4e020621083b3 Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Tue, 12 Nov 2024 17:06:34 -0500 Subject: [PATCH 22/32] Add new datasets and update project fetching functions --- docs/api.rst | 3 +- docs/conf.py | 3 +- docs/requirements.txt | 1 + netneurotools/datasets/__init__.py | 12 +- netneurotools/datasets/datasets.json | 71 ++++++ netneurotools/datasets/datasets_utils.py | 10 +- netneurotools/datasets/fetch_project.py | 270 ++++++++++++++++++--- netneurotools/datasets/references.json | 48 ++++ netneurotools/datasets/tests/test_fetch.py | 10 +- 9 files changed, 390 insertions(+), 38 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 601c92d..a0baf52 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -57,7 +57,8 @@ To download project-related data fetch_hansen_receptors fetch_hansen_genecognition fetch_hansen_brainstem - fetch_shafiei_hcpmeg + fetch_shafiei_megfmrimapping + fetch_shafiei_megdynamics fetch_suarez_mami fetch_famous_gmat fetch_neurosynth diff --git a/docs/conf.py b/docs/conf.py index 6ac2598..a5890a1 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -39,7 +39,8 @@ 'sphinx.ext.mathjax', 'sphinx.ext.napoleon', 'sphinx.ext.viewcode', - 'sphinx_gallery.gen_gallery' + 'sphinx_gallery.gen_gallery', + 'sphinx_design' ] # Generate the API documentation when building diff --git a/docs/requirements.txt b/docs/requirements.txt index 0162e19..2a4b94f 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -4,3 +4,4 @@ sphinx_rtd_theme sphinx-gallery pillow pytest-doctestplus +sphinx_design \ No newline at end of file diff --git a/netneurotools/datasets/__init__.py b/netneurotools/datasets/__init__.py index 0cd400e..f6c9bde 100644 --- a/netneurotools/datasets/__init__.py +++ b/netneurotools/datasets/__init__.py @@ -21,8 +21,10 @@ # old projects fetch_vazquez_rodriguez2019, fetch_mirchi2018, # new projects - fetch_hansen_manynetworks, fetch_hansen_receptors, fetch_hansen_genecognition, - fetch_hansen_brainstem, fetch_shafiei_hcpmeg, fetch_suarez_mami, + fetch_hansen_manynetworks, fetch_hansen_receptors, + fetch_hansen_genecognition, fetch_hansen_brainstem, + fetch_shafiei_megfmrimapping, fetch_shafiei_megdynamics, + fetch_suarez_mami, # example data fetch_famous_gmat, # resources @@ -44,8 +46,10 @@ 'fetch_voneconomo', # fetch_project 'fetch_vazquez_rodriguez2019', 'fetch_mirchi2018', - 'fetch_hansen_manynetworks', 'fetch_hansen_receptors', 'fetch_hansen_genecognition', - 'fetch_hansen_brainstem', 'fetch_shafiei_hcpmeg', 'fetch_suarez_mami', + 'fetch_hansen_manynetworks', 'fetch_hansen_receptors', + 'fetch_hansen_genecognition', 'fetch_hansen_brainstem', + 'fetch_shafiei_megfmrimapping', 'fetch_shafiei_megdynamics', + 'fetch_suarez_mami', 'fetch_famous_gmat', 'fetch_neurosynth', # datasets_utils diff --git a/netneurotools/datasets/datasets.json b/netneurotools/datasets/datasets.json index d0183b8..03c6169 100644 --- a/netneurotools/datasets/datasets.json +++ b/netneurotools/datasets/datasets.json @@ -287,5 +287,76 @@ "uncompress": true, "rename-folder": "hansen_many_networks-1.0.0", "rel-path": "ds-hansen_manynetworks" + }, + "ds-hansen_receptors": { + "url-type": "github-release", + "url": [ + "netneurolab", + "hansen_receptors", + "v1.0.0" + ], + "md5": "", + "uncompress": true, + "rename-folder": "hansen_receptors-1.0.0", + "rel-path": "ds-hansen_receptors" + }, + "ds-hansen_genecognition": { + "url-type": "github-release", + "url": [ + "netneurolab", + "hansen_genecognition", + "v1.0.0" + ], + "md5": "", + "uncompress": true, + "rename-folder": "hansen_genecognition-1.0.0", + "rel-path": "ds-hansen_genecognition" + }, + "ds-hansen_brainstem": { + "url-type": "github-release", + "url": [ + "netneurolab", + "hansen_brainstem", + "v1.0.0" + ], + "md5": "", + "uncompress": true, + "rename-folder": "hansen_brainstem-1.0.0", + "rel-path": "ds-hansen_brainstem" + }, + "ds-shafiei_megfmrimapping": { + "url-type": "github-release", + "url": [ + "netneurolab", + "shafiei_megfmrimapping", + "v1.0.0" + ], + "md5": "", + "uncompress": true, + "rename-folder": "shafiei_megfmrimapping-1.0.0", + "rel-path": "ds-shafiei_megfmrimapping" + }, + "ds-shafiei_megdynamics": { + "url-type": "github-release", + "url": [ + "netneurolab", + "shafiei_megdynamics", + "v1.0.0" + ], + "md5": "", + "uncompress": true, + "rename-folder": "shafiei_megdynamics-1.0.0", + "rel-path": "ds-shafiei_megdynamics" + }, + "ds-suarez_mami": { + "url-type": "zenodo-file", + "url": [ + "7143143", + "data.zip" + ], + "md5": "bf52dfd87b20fc565dd2bca626a7f65a", + "uncompress": true, + "rename-folder": "data", + "rel-path": "ds-suarez_mami" } } diff --git a/netneurotools/datasets/datasets_utils.py b/netneurotools/datasets/datasets_utils.py index 5ab90cd..209edc6 100644 --- a/netneurotools/datasets/datasets_utils.py +++ b/netneurotools/datasets/datasets_utils.py @@ -63,11 +63,14 @@ def _decode_url(url_type, url): """ OSF_API = "https://files.osf.io/v1/resources/{}/providers/osfstorage/{}" GITHUB_RELEASE_API = "https://github.com/{}/{}/archive/refs/tags/{}.tar.gz" + ZENODO_FILE_API = "https://zenodo.org/record/{}/files/{}?download=1" if url_type == "osf": out_url = OSF_API.format(*url) elif url_type == "github-release": out_url = GITHUB_RELEASE_API.format(*url) + elif url_type == "zenodo-file": + out_url = ZENODO_FILE_API.format(*url) else: raise ValueError("URL type {} not recognized".format(url_type)) @@ -128,7 +131,12 @@ def fetch_file(dataset_name, keys=None, force=False, data_dir=None, verbose=1): ) # extract contents and remove compressed file - shutil.unpack_archive(dl_fname, targ_folder.parent, format="gztar") + if info["url-type"] == "zenodo-file": + archive_format = "zip" + else: + archive_format = "gztar" + + shutil.unpack_archive(dl_fname, targ_folder.parent, format=archive_format) os.remove(dl_fname) # rename folder if necessary diff --git a/netneurotools/datasets/fetch_project.py b/netneurotools/datasets/fetch_project.py index 4329728..08e04b2 100644 --- a/netneurotools/datasets/fetch_project.py +++ b/netneurotools/datasets/fetch_project.py @@ -144,46 +144,258 @@ def fetch_hansen_manynetworks(force=False, data_dir=None, verbose=1): fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose) - # load data - data = { - "cammoun033": { - "gene": fetched / "data/Cammoun033/gene_coexpression.npy", - "func": fetched / "data/Cammoun033/func_coactivation.npy", - }, - "schaefer100": { - "gene": fetched / "data/Schaefer100/gene_coexpression.npy", - }, - "schaefer400": { - "gene": fetched / "data/Schaefer400/gene_coexpression.npy", - }, - } + # _parc = { + # "cammoun033": "Cammoun033", + # "schaefer100": "Schaefer100", + # "schaefer400": "Schaefer400", + # } + + # data = { + # parc: { + # "bigbrain": fetched + # / f"data/{_parc[parc]}/bigbrain_intensities.csv", + # "cognitive": fetched + # / f"data/{_parc[parc]}/cognitive_similarity.npy", + # "sc": fetched / f"data/{_parc[parc]}/consensusSC.npy", + # "sc_wei": fetched / f"data/{_parc[parc]}/consensusSC_wei.npy", + # "ephys": fetched + # / f"data/{_parc[parc]}/electrophysiological_connectivity.npy", + # "gene": fetched / f"data/{_parc[parc]}/gene_coexpression.npy", + # "megfc": fetched + # / f"data/{_parc[parc]}/groupFCmeg_aec_orth_{_parc[parc]}.npy.npz", + # "haemodynamic": fetched + # / f"data/{_parc[parc]}/haemodynamic_connectivity.npy", + # "laminar": fetched / f"data/{_parc[parc]}/laminar_similarity.npy", + # "metabolic": fetched + # / f"data/{_parc[parc]}/metabolic_connectivity.npy", + # "receptor": fetched + # / f"data/{_parc[parc]}/receptor_similarity.npy", + # "temporal": fetched + # / f"data/{_parc[parc]}/temporal_similarity.npy", + # "voneconomo": fetched + # / f"data/{_parc[parc]}/voneconomo_{_parc[parc]}.csv", + # } + # for parc in ["cammoun033", "schaefer100", "schaefer400"] + # } + + return fetched + + +def fetch_hansen_receptors(force=False, data_dir=None, verbose=1): + """ + Download files from Hansen et al., 2022, Nature Neuroscience. - return Bunch(**data) + This dataset contains + If you used this data, please cite [1]_. -def fetch_hansen_receptors(): - """Download files from Hansen et al., 2022, Nature Neuroscience.""" - pass + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with fetched data. + Other Parameters + ---------------- + force : bool, optional + If True, will overwrite existing dataset. Default: False + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 -def fetch_hansen_genecognition(): - """Download files from Hansen et al., 2021, Nature Human Behaviour.""" - pass + References + ---------- + .. [1] + """ + dataset_name = "ds-hansen_receptors" + _get_reference_info(dataset_name, verbose=verbose) + fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose) -def fetch_hansen_brainstem(): - """Download files from Hansen et al., 2024.""" - pass + return fetched -def fetch_shafiei_hcpmeg(): - """Download files from Shafiei et al., 2022 & Shafiei et al., 2023.""" - pass +def fetch_hansen_genecognition(force=False, data_dir=None, verbose=1): + """Download files from Hansen et al., 2021, Nature Human Behaviour. + This dataset contains -def fetch_suarez_mami(): - """Download files from Suarez et al., 2022, eLife.""" - pass + If you used this data, please cite [1]_. + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with fetched data. + + Other Parameters + ---------------- + force : bool, optional + If True, will overwrite existing dataset. Default: False + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + References + ---------- + .. [1] + """ + dataset_name = "ds-hansen_genecognition" + _get_reference_info(dataset_name, verbose=verbose) + + fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose) + + return fetched + + +def fetch_hansen_brainstem(force=False, data_dir=None, verbose=1): + """Download files from Hansen et al., 2024. + + This dataset contains + + If you used this data, please cite [1]_. + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with fetched data. + + Other Parameters + ---------------- + force : bool, optional + If True, will overwrite existing dataset. Default: False + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + References + ---------- + .. [1] + """ + dataset_name = "ds-hansen_brainstem" + _get_reference_info(dataset_name, verbose=verbose) + + fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose) + + return fetched + + +def fetch_shafiei_megfmrimapping(force=False, data_dir=None, verbose=1): + """Download files from Shafiei et al., 2022. + + This dataset contains + + If you used this data, please cite [1]_. + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with fetched data. + + Other Parameters + ---------------- + force : bool, optional + If True, will overwrite existing dataset. Default: False + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + References + ---------- + .. [1] + """ + dataset_name = "ds-shafiei_megfmrimapping" + _get_reference_info(dataset_name, verbose=verbose) + + fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose) + + return fetched + + +def fetch_shafiei_megdynamics(force=False, data_dir=None, verbose=1): + """Download files from Shafiei et al., 2023. + + This dataset contains + + If you used this data, please cite [1]_. + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with fetched data. + + Other Parameters + ---------------- + force : bool, optional + If True, will overwrite existing dataset. Default: False + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + References + ---------- + .. [1] + """ + dataset_name = "ds-shafiei_megdynamics" + _get_reference_info(dataset_name, verbose=verbose) + + fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose) + + return fetched + + +def fetch_suarez_mami(force=False, data_dir=None, verbose=1): + """Download files from Suarez et al., 2022, eLife. + + This dataset contains + + If you used this data, please cite [1]_. + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with fetched data. + + Other Parameters + ---------------- + force : bool, optional + If True, will overwrite existing dataset. Default: False + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + References + ---------- + .. [1] + """ + dataset_name = "ds-suarez_mami" + _get_reference_info(dataset_name, verbose=verbose) + + fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose) + + return fetched def fetch_famous_gmat(dataset, force=False, data_dir=None, verbose=1): diff --git a/netneurotools/datasets/references.json b/netneurotools/datasets/references.json index b5214a9..7fb9969 100644 --- a/netneurotools/datasets/references.json +++ b/netneurotools/datasets/references.json @@ -216,5 +216,53 @@ "bibkey": "" } ] + }, + "ds-hansen_receptors": { + "primary": [ + { + "citation": "", + "bibkey": "" + } + ] + }, + "ds-hansen_genecognition": { + "primary": [ + { + "citation": "", + "bibkey": "" + } + ] + }, + "ds-hansen_brainstem": { + "primary": [ + { + "citation": "", + "bibkey": "" + } + ] + }, + "ds-shafiei_megfmrimapping": { + "primary": [ + { + "citation": "", + "bibkey": "" + } + ] + }, + "ds-shafiei_megdynamics": { + "primary": [ + { + "citation": "", + "bibkey": "" + } + ] + }, + "ds-suarez_mami": { + "primary": [ + { + "citation": "", + "bibkey": "" + } + ] } } \ No newline at end of file diff --git a/netneurotools/datasets/tests/test_fetch.py b/netneurotools/datasets/tests/test_fetch.py index f607c97..6ef8c5e 100644 --- a/netneurotools/datasets/tests/test_fetch.py +++ b/netneurotools/datasets/tests/test_fetch.py @@ -163,9 +163,15 @@ def test_fetch_hansen_brainstem(self, tmpdir): assert False @pytest.mark.xfail - def test_fetch_shafiei_hcpmeg(self, tmpdir): + def test_fetch_shafiei_megfmrimapping(self, tmpdir): """Test fetching of Shafiei et al., 2022 & 2023 HCP-MEG dataset.""" - # shafiei = datasets.fetch_shafiei_hcpmeg(data_dir=tmpdir, verbose=0) + # shafiei = datasets.fetch_shafiei_megfmrimapping(data_dir=tmpdir, verbose=0) + assert False + + @pytest.mark.xfail + def test_fetch_shafiei_megdynamics(self, tmpdir): + """Test fetching of Shafiei et al., 2022 & 2023 HCP-MEG dataset.""" + # shafiei = datasets.fetch_shafiei_megdynamics(data_dir=tmpdir, verbose=0) assert False @pytest.mark.xfail From f03d97adca53432b3f8a117b21cce6a4ea56a562 Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Thu, 14 Nov 2024 21:34:45 -0500 Subject: [PATCH 23/32] Add curated datasets for fsaverage, fsLR, and CIVET --- docs/api.rst | 3 + netneurotools/datasets/__init__.py | 8 +- netneurotools/datasets/datasets.json | 132 ++++++++- netneurotools/datasets/datasets_utils.py | 2 +- netneurotools/datasets/fetch_template.py | 328 +++++++++++++++++++++++ netneurotools/datasets/netneurotools.bib | 11 + netneurotools/datasets/references.json | 80 +++++- 7 files changed, 537 insertions(+), 27 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index a0baf52..f6cfcca 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -27,8 +27,11 @@ To download templates fetch_fsaverage + fetch_fsaverage_curated fetch_hcp_standards + fetch_fslr_curated fetch_civet + fetch_civet_curated fetch_conte69 fetch_yerkes19 diff --git a/netneurotools/datasets/__init__.py b/netneurotools/datasets/__init__.py index f6c9bde..f7a0cf3 100644 --- a/netneurotools/datasets/__init__.py +++ b/netneurotools/datasets/__init__.py @@ -2,7 +2,9 @@ from .fetch_template import ( - fetch_fsaverage, fetch_hcp_standards, fetch_civet, + fetch_fsaverage, fetch_fsaverage_curated, + fetch_hcp_standards, fetch_fslr_curated, + fetch_civet, fetch_civet_curated, fetch_conte69, fetch_yerkes19 ) @@ -38,7 +40,9 @@ __all__ = [ # fetch_template - 'fetch_fsaverage', 'fetch_hcp_standards', 'fetch_civet', + 'fetch_fsaverage', 'fetch_fsaverage_curated', + 'fetch_hcp_standards', 'fetch_fslr_curated', + 'fetch_civet', 'fetch_civet_curated', 'fetch_conte69', 'fetch_yerkes19', # fetch_atlas 'fetch_cammoun2012', 'fetch_schaefer2018', 'fetch_mmpall', diff --git a/netneurotools/datasets/datasets.json b/netneurotools/datasets/datasets.json index 03c6169..1beb965 100644 --- a/netneurotools/datasets/datasets.json +++ b/netneurotools/datasets/datasets.json @@ -144,6 +144,102 @@ "rel-path": "tpl-fsaverage/fsaverage6" } }, + "tpl-fsaverage_curated": { + "fsaverage": { + "url-type": "osf", + "url": [ + "4mw3a", + "60b684b2cb2a5e01e968c918" + ], + "md5": "f4969a33b8cd88e46821ef581e492282", + "uncompress": true, + "rel-path": "tpl-fsaverage_curated/fsaverage" + }, + "fsaverage6": { + "url-type": "osf", + "url": [ + "4mw3a", + "60b684aecb2a5e01fc68b7e1" + ], + "md5": "0cc48e9d5d5bb0216502888c954805fd", + "uncompress": true, + "rel-path": "tpl-fsaverage_curated/fsaverage6" + }, + "fsaverage5": { + "url-type": "osf", + "url": [ + "4mw3a", + "60b684ab9096b7021b63cf6b" + ], + "md5": "c61384c271ee2e6b5449222281137414", + "uncompress": true, + "rel-path": "tpl-fsaverage_curated/fsaverage5" + }, + "fsaverage4": { + "url-type": "osf", + "url": [ + "4mw3a", + "60b684ac9096b7021c63d9c5" + ], + "md5": "019dbf849671f5bed5a42476e5359997", + "uncompress": true, + "rel-path": "tpl-fsaverage_curated/fsaverage4" + } + }, + "tpl-hcp_standards": { + "standard_mesh_atlases": { + "url-type": "osf", + "url": [ + "udpv8", + "6732749850aeaa53a10ddf2c" + ], + "md5": "c13bf257f0a7dea7955a83577cfe6659", + "uncompress": true, + "rel-path": "tpl-hcp_standards/standard_mesh_atlases" + } + }, + "tpl-fslr_curated": { + "fslr164k": { + "url-type": "osf", + "url": [ + "4mw3a", + "60b684b9cb2a5e01fd68c115" + ], + "md5": "e86a2e9d998e07d621e8f31f0205bf3e", + "uncompress": true, + "rel-path": "tpl-fslr_curated/fslr164k" + }, + "fslr32k": { + "url-type": "osf", + "url": [ + "4mw3a", + "60b684b53a6df1021bd4df2d" + ], + "md5": "7932b4418f63d28935b5adf67150b16f", + "uncompress": true, + "rel-path": "tpl-fslr_curated/fslr32k" + }, + "fslr8k": { + "url-type": "osf", + "url": [ + "4mw3a", + "63eadef9b3fed60444e3434f" + ], + "md5": "a1226aee262475e23d2cc37b84ef261d", + "uncompress": true, + "rel-path": "tpl-fslr_curated/fslr8k" + }, + "fslr4k": { + "url-type": "osf", + "url": [ + "4mw3a", + "63eadf00cb544b03fa9e6f00" + ], + "md5": "72b7b17d389a04774ca6fd4ca28b4087", + "uncompress": true, + "rel-path": "tpl-fslr_curated/fslr4k" + } + }, "tpl-civet": { "v1": { "civet41k": { @@ -180,6 +276,30 @@ } } }, + "tpl-civet_curated": { + "v2": { + "civet41k": { + "url-type": "osf", + "url": [ + "4mw3a", + "60b684a69096b7021c63d9a0" + ], + "md5": "b425aa4dd5aa9c3c764b192d2a799123", + "uncompress": true, + "rel-path": "tpl-civet_curated/v2/civet41k" + }, + "civet164k": { + "url-type": "osf", + "url": [ + "4mw3a", + "60b684ab3a6df1020dd50706" + ], + "md5": "90d8e99304e1ec3b85d1092ff9ac5b8f", + "uncompress": true, + "rel-path": "tpl-civet_curated/v2/civet164k" + } + } + }, "ds-famous_gmat": { "url-type": "osf", "url": [ @@ -264,18 +384,6 @@ "uncompress": true, "rel-path": "atl-voneconomo_koskinas" }, - "tpl-hcp_standards": { - "standard_mesh_atlases": { - "url-type": "osf", - "url": [ - "udpv8", - "6732749850aeaa53a10ddf2c" - ], - "md5": "c13bf257f0a7dea7955a83577cfe6659", - "uncompress": true, - "rel-path": "tpl-hcp_standards/standard_mesh_atlases" - } - }, "ds-hansen_manynetworks": { "url-type": "github-release", "url": [ diff --git a/netneurotools/datasets/datasets_utils.py b/netneurotools/datasets/datasets_utils.py index 209edc6..2c39ca8 100644 --- a/netneurotools/datasets/datasets_utils.py +++ b/netneurotools/datasets/datasets_utils.py @@ -15,7 +15,7 @@ from nilearn.datasets.utils import _fetch_file -SURFACE = namedtuple("Surface", ("lh", "rh")) +SURFACE = namedtuple("Surface", ("L", "R")) FREESURFER_IGNORE = [ "unknown", diff --git a/netneurotools/datasets/fetch_template.py b/netneurotools/datasets/fetch_template.py index 24bb7d4..9081efd 100644 --- a/netneurotools/datasets/fetch_template.py +++ b/netneurotools/datasets/fetch_template.py @@ -104,6 +104,106 @@ def fetch_fsaverage( return Bunch(**data) +def fetch_fsaverage_curated(version="fsaverage", force=False, data_dir=None, verbose=1): + """ + Download files for fsaverage FreeSurfer template. + + Curated by neuromaps. + + This dataset contains + + If you used this data, please cite 1_, 2_, 3_, 4_. + + Parameters + ---------- + version : str, optional + One of {'fsaverage', 'fsaverage4', 'fsaverage5', + 'fsaverage6'}. Default: 'fsaverage' + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with template files. + + Other Parameters + ---------------- + force : bool, optional + If True, will overwrite existing dataset. Default: False + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + References + ---------- + .. [1] Anders M Dale, Bruce Fischl, and Martin I Sereno. Cortical + surface-based analysis: i. segmentation and surface reconstruction. + Neuroimage, 9(2):179\u2013194, 1999. + .. [2] Bruce Fischl, Martin I Sereno, and Anders M Dale. Cortical + surface-based analysis: ii: inflation, flattening, and a surface-based + coordinate system. Neuroimage, 9(2):195\u2013207, 1999. + .. [3] Bruce Fischl, Martin I Sereno, Roger BH Tootell, and Anders M Dale. + High-resolution intersubject averaging and a coordinate system for the + cortical surface. Human brain mapping, 8(4):272\u2013284, 1999. + .. [4] Ross D Markello, Justine Y Hansen, Zhen-Qi Liu, Vincent Bazinet, + Golia Shafiei, Laura E Su\u00e1rez, Nadia Blostein, Jakob Seidlitz, + Sylvain Baillet, Theodore D Satterthwaite, and others. Neuromaps: + structural and functional interpretation of brain maps. Nature Methods, + 19(11):1472\u20131479, 2022. + """ + versions = ["fsaverage", "fsaverage6", "fsaverage5", "fsaverage4"] + if version not in versions: + raise ValueError( + f"The version of fsaverage requested {version} does not " + f"exist. Must be one of {versions}" + ) + + dataset_name = "tpl-fsaverage_curated" + _get_reference_info("tpl-fsaverage_curated", verbose=verbose) + + keys = ["white", "pial", "inflated", "sphere", "medial", "sulc", "vaavg"] + keys_suffix = { + "white": "white.surf", + "pial": "pial.surf", + "inflated": "inflated.surf", + "sphere": "sphere.surf", + "medial": "desc-nomedialwall_dparc.label", + "sulc": "desc-sulc_midthickness.shape", + "vaavg": "desc-vaavg_midthickness.shape", + } + version_density = { + "fsaverage": "164k", + "fsaverage6": "41k", + "fsaverage5": "10k", + "fsaverage4": "3k", + } + density = version_density[version] + + fetched = fetch_file( + dataset_name, keys=version, force=force, data_dir=data_dir, verbose=verbose + ) + + # deal with default neuromaps directory structure in the archive + if not fetched.exists(): + import shutil + + shutil.move(fetched.parent / "atlases/fsaverage", fetched) + shutil.rmtree(fetched.parent / "atlases") + + data = { + k: SURFACE( + fetched / f"tpl-fsaverage_den-{density}_hemi-L_{keys_suffix[k]}.gii", + fetched / f"tpl-fsaverage_den-{density}_hemi-R_{keys_suffix[k]}.gii", + ) + for k in keys + } + + return Bunch(**data) + + def fetch_hcp_standards(force=False, data_dir=None, verbose=1): """ Fetch HCP standard mesh atlases for converting between FreeSurfer and HCP. @@ -161,6 +261,116 @@ def fetch_hcp_standards(force=False, data_dir=None, verbose=1): return fetched +def fetch_fslr_curated(version="fslr32k", force=False, data_dir=None, verbose=1): + """ + Download files for HCP fsLR template. + + Curated by neuromaps. + + This dataset contains + + If you used this data, please cite 1_, 2_, 3_. + + Parameters + ---------- + version : str, optional + One of {"fslr4k", "fslr8k", "fslr32k", "fslr164k"}. Default: 'fslr32k' + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with template files. + + Other Parameters + ---------------- + force : bool, optional + If True, will overwrite existing dataset. Default: False + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + References + ---------- + .. [1] David C Van Essen, Kamil Ugurbil, Edward Auerbach, Deanna + Barch,Timothy EJ Behrens, Richard Bucholz, Acer Chang, Liyong Chen, + Maurizio Corbetta, Sandra W Curtiss, and others. The human connectome + project: a data acquisition perspective. Neuroimage, + 62(4):2222\u20132231, 2012. + .. [2] Matthew F Glasser, Stamatios N Sotiropoulos, J Anthony Wilson, + Timothy S Coalson, Bruce Fischl, Jesper L Andersson, Junqian Xu, Saad + Jbabdi, Matthew Webster, Jonathan R Polimeni, and others. The minimal + preprocessing pipelines for the human connectome project. Neuroimage, + 80:105\u2013124, 2013. + .. [3] Ross D Markello, Justine Y Hansen, Zhen-Qi Liu, Vincent Bazinet, + Golia Shafiei, Laura E Su\u00e1rez, Nadia Blostein, Jakob Seidlitz, + Sylvain Baillet, Theodore D Satterthwaite, and others. Neuromaps: + structural and functional interpretation of brain maps. Nature Methods, + 19(11):1472\u20131479, 2022. + """ + versions = ["fslr4k", "fslr8k", "fslr32k", "fslr164k"] + if version not in versions: + raise ValueError( + f"The version of fsaverage requested {version} does not " + f"exist. Must be one of {versions}" + ) + + dataset_name = "tpl-fslr_curated" + _get_reference_info("tpl-fslr_curated", verbose=verbose) + + keys = [ + "midthickness", + "inflated", + "veryinflated", + "sphere", + "medial", + "sulc", + "vaavg", + ] + if version == "fslr4k": + keys.remove("veryinflated") + keys_suffix = { + "midthickness": "midthickness.surf", + "inflated": "inflated.surf", + "veryinflated": "veryinflated.surf", + "sphere": "sphere.surf", + "medial": "desc-nomedialwall_dparc.label", + "sulc": "desc-sulc_midthickness.shape", + "vaavg": "desc-vaavg_midthickness.shape", + } + version_density = { + "fslr4k": "4k", + "fslr8k": "8k", + "fslr32k": "32k", + "fslr164k": "164k", + } + density = version_density[version] + + fetched = fetch_file( + dataset_name, keys=version, force=force, data_dir=data_dir, verbose=verbose + ) + + # deal with default neuromaps directory structure in the archive + if not fetched.exists(): + import shutil + + shutil.move(fetched.parent / "atlases/fsLR", fetched) + shutil.rmtree(fetched.parent / "atlases") + + data = { + k: SURFACE( + fetched / f"tpl-fsLR_den-{density}_hemi-L_{keys_suffix[k]}.gii", + fetched / f"tpl-fsLR_den-{density}_hemi-R_{keys_suffix[k]}.gii", + ) + for k in keys + } + + return Bunch(**data) + + def fetch_civet(density="41k", version="v1", force=False, data_dir=None, verbose=1): """ Fetch CIVET surface files. @@ -256,6 +466,124 @@ def fetch_civet(density="41k", version="v1", force=False, data_dir=None, verbose return Bunch(**data) +def fetch_civet_curated(version="civet41k", force=False, data_dir=None, verbose=1): + """ + Download files for CIVET template. + + Curated by neuromaps. + + This dataset contains + + If you used this data, please cite 1_, 2_, 3_, 4_. + + Parameters + ---------- + version : {'civet41k', 'civet164k'}, optional + Which density of the CIVET-space geometry files to fetch. + + Returns + ------- + filenames : :class:`sklearn.utils.Bunch` + Dictionary-like object with template files. + + Other Parameters + ---------------- + force : bool, optional + If True, will overwrite existing dataset. Default: False + data_dir : str, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 1 + + Notes + ----- + License: https://github.com/aces/CIVET_Full_Project/blob/master/LICENSE + + References + ---------- + .. [1] Oliver Lyttelton, Maxime Boucher, Steven Robbins, and Alan Evans. An + unbiased iterative group registration template for cortical surface + analysis. Neuroimage, 34(4):1535\u20131544, 2007. + .. [2] Vladimir S Fonov, Alan C Evans, Robert C McKinstry, C Robert Almli, + and DL Collins. Unbiased nonlinear average age-appropriate brain + templates from birth to adulthood. NeuroImage, 47:S102, 2009. + .. [3] Y Ad-Dab'bagh, O Lyttelton, J Muehlboeck, C Lepage, D Einarson, K + Mok, O Ivanov, R Vincent, J Lerch, and E Fombonne. The civet + image-processing environment: a fully automated comprehensive pipeline + for anatomical neuroimaging research. proceedings of the 12th annual + meeting of the organization for human brain mapping. Florence, Italy, + pages 2266, 2006. + .. [4] Ross D Markello, Justine Y Hansen, Zhen-Qi Liu, Vincent Bazinet, + Golia Shafiei, Laura E Su\u00e1rez, Nadia Blostein, Jakob Seidlitz, + Sylvain Baillet, Theodore D Satterthwaite, and others. Neuromaps: + structural and functional interpretation of brain maps. Nature Methods, + 19(11):1472\u20131479, 2022. + """ + versions = ["civet41k", "civet164k"] + if version not in versions: + raise ValueError( + f"The version of fsaverage requested {version} does not " + f"exist. Must be one of {versions}" + ) + + dataset_name = "tpl-civet_curated" + _get_reference_info("tpl-civet_curated", verbose=verbose) + + keys = [ + "white", + "midthickness", + "inflated", + "veryinflated", + "sphere", + "medial", + "sulc", + "vaavg", + ] + keys_suffix = { + "white": "white.surf", + "midthickness": "midthickness.surf", + "inflated": "inflated.surf", + "veryinflated": "veryinflated.surf", + "sphere": "sphere.surf", + "medial": "desc-nomedialwall_dparc.label", + "sulc": "desc-sulc_midthickness.shape", + "vaavg": "desc-vaavg_midthickness.shape", + } + version_density = { + "civet41k": "41k", + "civet164k": "164k", + } + density = version_density[version] + + fetched = fetch_file( + dataset_name, + keys=["v2", version], + force=force, + data_dir=data_dir, + verbose=verbose, + ) + + # deal with default neuromaps directory structure in the archive + if not fetched.exists(): + import shutil + + shutil.move(fetched.parent / "atlases/civet", fetched) + shutil.rmtree(fetched.parent / "atlases") + + data = { + k: SURFACE( + fetched / f"tpl-civet_den-{density}_hemi-L_{keys_suffix[k]}.gii", + fetched / f"tpl-civet_den-{density}_hemi-R_{keys_suffix[k]}.gii", + ) + for k in keys + } + + return Bunch(**data) + + def fetch_conte69(force=False, data_dir=None, verbose=1): """ Download files for Van Essen et al., 2012 Conte69 template. diff --git a/netneurotools/datasets/netneurotools.bib b/netneurotools/datasets/netneurotools.bib index 1e78958..2e725c4 100644 --- a/netneurotools/datasets/netneurotools.bib +++ b/netneurotools/datasets/netneurotools.bib @@ -251,3 +251,14 @@ @article{scholtens2018mri year={2018}, publisher={Elsevier} } + +@article{markello2022neuromaps, + title={Neuromaps: structural and functional interpretation of brain maps}, + author={Markello, Ross D and Hansen, Justine Y and Liu, Zhen-Qi and Bazinet, Vincent and Shafiei, Golia and Su{\'a}rez, Laura E and Blostein, Nadia and Seidlitz, Jakob and Baillet, Sylvain and Satterthwaite, Theodore D and others}, + journal={Nature Methods}, + volume={19}, + number={11}, + pages={1472--1479}, + year={2022}, + publisher={Nature Publishing Group US New York} +} diff --git a/netneurotools/datasets/references.json b/netneurotools/datasets/references.json index 7fb9969..d4ec5d8 100644 --- a/netneurotools/datasets/references.json +++ b/netneurotools/datasets/references.json @@ -51,6 +51,54 @@ } ] }, + "tpl-fsaverage_curated": { + "primary": [ + { + "citation": "Anders M Dale, Bruce Fischl, and Martin I Sereno. Cortical surface-based analysis: i. segmentation and surface reconstruction. Neuroimage, 9(2):179\u2013194, 1999.", + "bibkey": "dale1999cortical" + }, + { + "citation": "Bruce Fischl, Martin I Sereno, and Anders M Dale. Cortical surface-based analysis: ii: inflation, flattening, and a surface-based coordinate system. Neuroimage, 9(2):195\u2013207, 1999.", + "bibkey": "fischl1999cortical" + }, + { + "citation": "Bruce Fischl, Martin I Sereno, Roger BH Tootell, and Anders M Dale. High-resolution intersubject averaging and a coordinate system for the cortical surface. Human brain mapping, 8(4):272\u2013284, 1999.", + "bibkey": "fischl1999high" + }, + { + "citation": "Ross D Markello, Justine Y Hansen, Zhen-Qi Liu, Vincent Bazinet, Golia Shafiei, Laura E Su\u00e1rez, Nadia Blostein, Jakob Seidlitz, Sylvain Baillet, Theodore D Satterthwaite, and others. Neuromaps: structural and functional interpretation of brain maps. Nature Methods, 19(11):1472\u20131479, 2022.", + "bibkey": "markello2022neuromaps" + } + ] + }, + "tpl-hcp_standards": { + "primary": [ + { + "citation": "David C Van Essen, Kamil Ugurbil, Edward Auerbach, Deanna Barch, Timothy EJ Behrens, Richard Bucholz, Acer Chang, Liyong Chen, Maurizio Corbetta, Sandra W Curtiss, and others. The human connectome project: a data acquisition perspective. Neuroimage, 62(4):2222\u20132231, 2012.", + "bibkey": "van2012human" + }, + { + "citation": "Matthew F Glasser, Stamatios N Sotiropoulos, J Anthony Wilson, Timothy S Coalson, Bruce Fischl, Jesper L Andersson, Junqian Xu, Saad Jbabdi, Matthew Webster, Jonathan R Polimeni, and others. The minimal preprocessing pipelines for the human connectome project. Neuroimage, 80:105\u2013124, 2013.", + "bibkey": "glasser2013minimal" + } + ] + }, + "tpl-fslr_curated": { + "primary": [ + { + "citation": "David C Van Essen, Kamil Ugurbil, Edward Auerbach, Deanna Barch, Timothy EJ Behrens, Richard Bucholz, Acer Chang, Liyong Chen, Maurizio Corbetta, Sandra W Curtiss, and others. The human connectome project: a data acquisition perspective. Neuroimage, 62(4):2222\u20132231, 2012.", + "bibkey": "van2012human" + }, + { + "citation": "Matthew F Glasser, Stamatios N Sotiropoulos, J Anthony Wilson, Timothy S Coalson, Bruce Fischl, Jesper L Andersson, Junqian Xu, Saad Jbabdi, Matthew Webster, Jonathan R Polimeni, and others. The minimal preprocessing pipelines for the human connectome project. Neuroimage, 80:105\u2013124, 2013.", + "bibkey": "glasser2013minimal" + }, + { + "citation": "Ross D Markello, Justine Y Hansen, Zhen-Qi Liu, Vincent Bazinet, Golia Shafiei, Laura E Su\u00e1rez, Nadia Blostein, Jakob Seidlitz, Sylvain Baillet, Theodore D Satterthwaite, and others. Neuromaps: structural and functional interpretation of brain maps. Nature Methods, 19(11):1472\u20131479, 2022.", + "bibkey": "markello2022neuromaps" + } + ] + }, "tpl-civet": { "primary": [ { @@ -67,6 +115,26 @@ } ] }, + "tpl-civet_curated": { + "primary": [ + { + "citation": "Oliver Lyttelton, Maxime Boucher, Steven Robbins, and Alan Evans. An unbiased iterative group registration template for cortical surface analysis. Neuroimage, 34(4):1535\u20131544, 2007.", + "bibkey": "lyttelton2007unbiased" + }, + { + "citation": "Vladimir S Fonov, Alan C Evans, Robert C McKinstry, C Robert Almli, and DL Collins. Unbiased nonlinear average age-appropriate brain templates from birth to adulthood. NeuroImage, 47:S102, 2009.", + "bibkey": "fonov2009unbiased" + }, + { + "citation": "Y Ad-Dab'bagh, O Lyttelton, J Muehlboeck, C Lepage, D Einarson, K Mok, O Ivanov, R Vincent, J Lerch, and E Fombonne. The civet image-processing environment: a fully automated comprehensive pipeline for anatomical neuroimaging research. proceedings of the 12th annual meeting of the organization for human brain mapping. Florence, Italy, pages 2266, 2006.", + "bibkey": "ad2006civet" + }, + { + "citation": "Ross D Markello, Justine Y Hansen, Zhen-Qi Liu, Vincent Bazinet, Golia Shafiei, Laura E Su\u00e1rez, Nadia Blostein, Jakob Seidlitz, Sylvain Baillet, Theodore D Satterthwaite, and others. Neuromaps: structural and functional interpretation of brain maps. Nature Methods, 19(11):1472\u20131479, 2022.", + "bibkey": "markello2022neuromaps" + } + ] + }, "ds-famous_gmat": { "primary": [ { @@ -149,18 +217,6 @@ } ] }, - "tpl-hcp_standards": { - "primary": [ - { - "citation": "David C Van Essen, Kamil Ugurbil, Edward Auerbach, Deanna Barch, Timothy EJ Behrens, Richard Bucholz, Acer Chang, Liyong Chen, Maurizio Corbetta, Sandra W Curtiss, and others. The human connectome project: a data acquisition perspective. Neuroimage, 62(4):2222\u20132231, 2012.", - "bibkey": "van2012human" - }, - { - "citation": "Matthew F Glasser, Stamatios N Sotiropoulos, J Anthony Wilson, Timothy S Coalson, Bruce Fischl, Jesper L Andersson, Junqian Xu, Saad Jbabdi, Matthew Webster, Jonathan R Polimeni, and others. The minimal preprocessing pipelines for the human connectome project. Neuroimage, 80:105\u2013124, 2013.", - "bibkey": "glasser2013minimal" - } - ] - }, "ds-hansen_manynetworks": { "primary": [ { From 63a22c5fde9062131cb952ddc696a01229268840 Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Thu, 14 Nov 2024 21:35:31 -0500 Subject: [PATCH 24/32] Add pyvista plotter --- netneurotools/plotting/mpl_plotters.py | 6 + netneurotools/plotting/pyvista_plotters.py | 415 ++++++++++++++++++++- 2 files changed, 418 insertions(+), 3 deletions(-) diff --git a/netneurotools/plotting/mpl_plotters.py b/netneurotools/plotting/mpl_plotters.py index 66dcd9b..e5637dd 100644 --- a/netneurotools/plotting/mpl_plotters.py +++ b/netneurotools/plotting/mpl_plotters.py @@ -285,3 +285,9 @@ def plot_point_brain(data, coords, views=None, views_orientation='vertical', cbar.outline.set_linewidth(0) return fig + + +def plot_simple_brain(): + """Plot a simple brain using matplotlib.""" + # https://github.com/dutchconnectomelab/Simple-Brain-Plot + pass diff --git a/netneurotools/plotting/pyvista_plotters.py b/netneurotools/plotting/pyvista_plotters.py index 7b17565..18fd4b2 100644 --- a/netneurotools/plotting/pyvista_plotters.py +++ b/netneurotools/plotting/pyvista_plotters.py @@ -1,6 +1,415 @@ """Functions for pyvista-based plotting.""" +from pathlib import Path +import numpy as np +import nibabel as nib +import pyvista as pv +from netneurotools.datasets import ( + fetch_fslr_curated, + fetch_fsaverage_curated, + fetch_civet_curated, +) -def pv_plot_surface(): - """Plot a surface using PyVista.""" - pass + +def _pv_fetch_template(template, surf="inflated", data_dir=None, verbose=0): + if template in ["fsaverage", "fsaverage6", "fsaverage5", "fsaverage4"]: + _fetch_curr_tpl = fetch_fsaverage_curated + elif template in ["fslr4k", "fslr8k", "fslr32k", "fslr164k"]: + _fetch_curr_tpl = fetch_fslr_curated + elif template in ["civet41k", "civet164k"]: + _fetch_curr_tpl = fetch_civet_curated + else: + raise ValueError(f"Unknown template: {template}") + + curr_tpl_surf = _fetch_curr_tpl( + version=template, data_dir=data_dir, verbose=verbose + )[surf] + + return curr_tpl_surf + + +def _pv_make_surface(template, surf="inflated", hemi=None, data_dir=None, verbose=0): + curr_tpl_surf = _pv_fetch_template( + template=template, surf=surf, data_dir=data_dir, verbose=verbose + ) + + def _gifti_to_polydata(gifti_file): + vertices, faces = nib.load(gifti_file).agg_data() + return pv.PolyData( + vertices, np.c_[np.ones((faces.shape[0],), dtype=int) * 3, faces] + ) + + if hemi == "L": + return _gifti_to_polydata(curr_tpl_surf.L) + elif hemi == "R": + return _gifti_to_polydata(curr_tpl_surf.R) + else: + return ( + _gifti_to_polydata(curr_tpl_surf.L), + _gifti_to_polydata(curr_tpl_surf.R), + ) + + +def _mask_medial_wall(data, template, hemi=None, data_dir=None, verbose=0): + curr_medial = _pv_fetch_template( + template=template, surf="medial", data_dir=data_dir, verbose=verbose + ) + if isinstance(data, tuple): + curr_medial_data = ( + nib.load(curr_medial.L).agg_data(), + nib.load(curr_medial.R).agg_data(), + ) + ret_L = data[0].copy() + ret_R = data[1].copy() + ret_L[np.where(1 - curr_medial_data[0])] = np.nan + ret_R[np.where(1 - curr_medial_data[1])] = np.nan + ret = (ret_L, ret_R) + else: + if hemi == "L": + curr_medial_data = nib.load(curr_medial.L).agg_data() + elif hemi == "R": + curr_medial_data = nib.load(curr_medial.R).agg_data() + else: + curr_medial_data = np.concatenate( + [ + nib.load(curr_medial.L).agg_data(), + nib.load(curr_medial.R).agg_data(), + ], + axis=1, + ) + ret = data.copy() + ret[np.where(1 - curr_medial_data)] = np.nan + return ret + + +def pv_plot_surface( + vertex_data, + template, + surf="inflated", + hemi="both", + layout="default", + mask_medial=True, + cmap="viridis", + clim=None, + zoom_ratio=1.0, + show_colorbar=True, + cbar_title=None, + show_plot=True, + jupyter_backend="html", + lighting_style="default", + save_fig=None, + plotter_kws=None, + mesh_kws=None, + cbar_kws=None, + silhouette_kws=None, + data_dir=None, + verbose=0, +): + """ + Plot surface data using PyVista. + + Parameters + ---------- + vertex_data : array-like or tuple of array-like + Data array(s) to be plotted on the surface. If `hemi` is "both", this + should be a tuple of two arrays. Otherwise, a single array. + template : str + Template to use for plotting. Options include 'fsaverage', 'fsaverage6', + 'fsaverage5', 'fsaverage4', 'fslr4k', 'fslr8k', 'fslr32k', 'fslr164k', + 'civet41k', 'civet164k'. + surf : str, optional + Surface to plot. Default is 'inflated'. + hemi : str, optional + Hemisphere to plot. Options include 'L', 'R', 'both'. Default is 'both'. + layout : str, optional + Layout of the plot. Options include 'default', 'single', 'row', 'column'. + Default is 'default'. + mask_medial : bool, optional + Mask medial wall. Default is True. + cmap : str, optional + Colormap to use. Default is 'viridis'. + clim : tuple, optional + Colorbar limits. If None, will be set to 2.5th and 97.5th percentiles. + Default is None. + zoom_ratio : float, optional + Zoom ratio for the camera. Default is 1.0. + show_colorbar : bool, optional + Whether to show the colorbar. Default is True. + cbar_title : str, optional + Title for the colorbar. Default is None. + show_plot : bool, optional + Whether to show the plot. Default is True. + jupyter_backend : str, optional + Jupyter backend to use. See `PyVista documentation + `_ + for more details. Default is 'html'. + lighting_style : str, optional + Lighting style to use. Options include 'default', 'lightkit', 'threelights', + 'silhouette', 'metallic', 'plastic', 'shiny', 'glossy', 'ambient', 'plain'. + Default is 'default'. + save_fig : str or Path, optional + Path (include file name) to save the figure. Default is None. + + Returns + ------- + pl : PyVista.Plotter + PyVista plotter object. + + Other Parameters + ---------------- + plotter_kws : dict, optional + Additional keyword arguments to pass to the `PyVista plotter + `_. + Default is None. + mesh_kws : dict, optional + Additional keyword arguments to pass to the `PyVista mesh + `_. + Default is None. + cbar_kws : dict, optional + Additional keyword arguments to pass to the `PyVista colorbar + `_. + Default is None. + silhouette_kws : dict, optional + Additional keyword arguments to pass to the `PyVista silhouette + `_. + Default is None. + data_dir : str or Path, optional + Path to use as data directory. If not specified, will check for + environmental variable 'NNT_DATA'; if that is not set, will use + `~/nnt-data` instead. Default: None + verbose : int, optional + Modifies verbosity of download, where higher numbers mean more updates. + Default: 0 + """ + # setup data + # could be a single array or a tuple of two arrays + if hemi == "both": # both hemispheres + surf_pair = _pv_make_surface( + template=template, surf=surf, data_dir=data_dir, verbose=verbose + ) + if len(vertex_data) == 2: # tuple or list of two arrays + # check if data length matches number of vertices + if not all(len(vertex_data[i]) == surf_pair[i].n_points for i in range(2)): + raise ValueError("Data length mismatch") + else: # combined array + # check if data length matches number of vertices + if len(vertex_data) != surf_pair[0].n_points + surf_pair[1].n_points: + raise ValueError("Data length mismatch") + # convert long array to tuple + vertex_data = ( + vertex_data[: surf_pair[0].n_points], + vertex_data[surf_pair[0].n_points :], + ) + + if mask_medial: + vertex_data = _mask_medial_wall( + vertex_data, template, hemi=None, data_dir=data_dir, verbose=verbose + ) + surf_pair[0].point_data["vertex_data"] = vertex_data[0] + surf_pair[1].point_data["vertex_data"] = vertex_data[1] + elif hemi in ["L", "R"]: + # single hemisphere + surf = _pv_make_surface( + template=template, surf=surf, hemi=hemi, data_dir=data_dir, verbose=verbose + ) + if len(vertex_data) != surf.n_points: + raise ValueError("Data length mismatch") + + if mask_medial: + vertex_data = _mask_medial_wall( + vertex_data, template, hemi=hemi, data_dir=data_dir, verbose=verbose + ) + surf.point_data["vertex_data"] = vertex_data + else: + raise ValueError(f"Unknown hemi: {hemi}") + + # setup plotter shape based on layout + if layout == "default": + if hemi == "both": + plotter_shape = (2, 2) + else: + plotter_shape = (1, 2) + elif layout == "single": + plotter_shape = (1, 1) + elif layout == "row": + if hemi == "both": + plotter_shape = (1, 4) + else: + plotter_shape = (2, 1) + elif layout == "column": + if hemi == "both": + plotter_shape = (4, 1) + else: + plotter_shape = (1, 2) + else: + raise ValueError(f"Unknown layout: {layout}") + + # setup color limits + if clim is not None: + _vmin, _vmax = clim + else: + if len(vertex_data) == 2: + _values = np.c_[vertex_data[0], vertex_data[1]] + else: + _values = vertex_data + _vmin, _vmax = np.nanpercentile(_values, [2.5, 97.5]) + + # default plotter settings + plotter_settings = dict( + window_size=(350 * plotter_shape[1], 250 * plotter_shape[0]), + border=False, + lighting="three lights", + ) + # notebook plotting + if jupyter_backend is not None: + plotter_settings.update(dict(notebook=True, off_screen=True)) + + # default mesh settings + mesh_settings = dict( + scalars="vertex_data", + smooth_shading=True, + cmap=cmap, + clim=(_vmin, _vmax), + show_scalar_bar=False, + ) + + # lighting styles + lighting_style_keys = ["ambient", "diffuse", "specular", "specular_power"] + lighting_style_presets = { + "metallic": [0.1, 0.3, 1.0, 10], + "plastic": [0.3, 0.4, 0.3, 5], + "shiny": [0.2, 0.6, 0.8, 50], + "glossy": [0.1, 0.7, 0.9, 90], + "ambient": [0.8, 0.1, 0.0, 1], + "plain": [0.1, 1.0, 0.05, 5], + } + + if lighting_style in ["default", "lightkit"]: + mesh_settings["lighting"] = "light kit" + elif lighting_style == "threelights": + mesh_settings["lighting"] = "three lights" + elif lighting_style == "silhouette": + mesh_settings["lighting"] = "light kit" + elif lighting_style in lighting_style_presets.keys(): + mesh_settings.update( + { + k: v + for k, v in zip( + lighting_style_keys, lighting_style_presets[lighting_style] + ) + } + ) + mesh_settings["lighting"] = "light kit" + else: + raise ValueError(f"Unknown lighting style: {lighting_style}") + + # default colorbar settings + cbar_settings = dict( + title=cbar_title, + n_labels=2, + label_font_size=10, + title_font_size=12, + font_family="arial", + height=0.15, + ) + + # default silhouette settings + silhouette_settings = dict( + color="white", + feature_angle=40 + ) + + # update if provided with custom settings + if plotter_kws is not None: + plotter_settings.update(plotter_kws) + if mesh_kws is not None: + mesh_settings.update(mesh_kws) + if cbar_kws is not None: + cbar_settings.update(cbar_kws) + if silhouette_kws is not None: + silhouette_settings.update(silhouette_kws) + + pl = pv.Plotter(shape=plotter_shape, **plotter_settings) + + if layout == "single": # single panel (1, 1) + if hemi == "both": + _surf = surf_pair[0].rotate_z(180) + pl.subplot(0, 0) + pl.add_mesh(_surf, **mesh_settings) + pl.camera_position = "yz" + pl.zoom_camera(zoom_ratio) + if lighting_style == "silhouette": + pl.add_silhouette(_surf, **silhouette_settings) + else: # multiple panels + if hemi == "both": # both hemi, 4 panels + if layout == "default": + _pos = [(0, 0), (0, 1), (1, 0), (1, 1)] + elif layout == "row": + _pos = [(0, 0), (0, 2), (0, 1), (0, 3)] + elif layout == "column": + _pos = [(0, 0), (2, 0), (1, 0), (3, 0)] + else: + raise ValueError(f"Unknown layout: {layout}") + _surf_list = [ + surf_pair[0].rotate_z(180), + surf_pair[1], + surf_pair[0], + surf_pair[1].rotate_z(180), + ] + for _xy, _surf in zip(_pos, _surf_list): + pl.subplot(*_xy) + pl.add_mesh(_surf, **mesh_settings) + pl.camera_position = "yz" + pl.zoom_camera(zoom_ratio) + if lighting_style == "silhouette": + pl.add_silhouette(_surf, **silhouette_settings) + else: # single hemi, 2 panels + if layout == "default": + _pos = [(0, 0), (0, 1)] + elif layout == "row": + _pos = [(0, 0), (0, 1)] + elif layout == "column": + _pos = [(0, 0), (1, 0)] + else: + raise ValueError(f"Unknown layout: {layout}") + + if hemi == "L": + _surf_list = [surf.rotate_z(180), surf] + else: + _surf_list = [surf, surf.rotate_z(180)] + + for _xy, _surf in zip(_pos, _surf_list): + pl.subplot(*_xy) + pl.add_mesh(_surf, **mesh_settings) + pl.camera_position = "yz" + pl.zoom_camera(zoom_ratio) + if lighting_style == "silhouette": + pl.add_silhouette(_surf, **silhouette_settings) + + if show_colorbar: + cbar = pl.add_scalar_bar(**cbar_settings) + cbar.GetLabelTextProperty().SetItalic(True) + + # setting the headlight (by default applied to all scenes) + if lighting_style in ["default", "silhouette"] + list( + lighting_style_presets.keys() + ): + light = pv.Light(light_type="headlight", intensity=0.2) + pl.add_light(light) + + if show_plot: + if jupyter_backend is not None: + pl.show(jupyter_backend=jupyter_backend) + else: + pl.show() + + if save_fig is not None: + _fname = Path(save_fig) + if _fname.suffix in [".png", ".jpeg", ".jpg", ".bmp", ".tif", ".tiff"]: + pl.screenshot(_fname, return_img=False) + elif _fname.suffix in [".svg", ".eps", ".ps", ".pdf", ".tex"]: + pl.save_graphic(_fname) + else: + raise ValueError(f"Unknown file format: {save_fig}") + + return pl From 82b592649e93784e2ab748b64ee64d7f4e9bc530 Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Thu, 14 Nov 2024 21:38:45 -0500 Subject: [PATCH 25/32] Improve pyvista import handling --- netneurotools/plotting/pyvista_plotters.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/netneurotools/plotting/pyvista_plotters.py b/netneurotools/plotting/pyvista_plotters.py index 18fd4b2..41de804 100644 --- a/netneurotools/plotting/pyvista_plotters.py +++ b/netneurotools/plotting/pyvista_plotters.py @@ -1,13 +1,21 @@ """Functions for pyvista-based plotting.""" from pathlib import Path -import numpy as np + import nibabel as nib -import pyvista as pv +import numpy as np + +try: + import pyvista as pv +except ImportError: + raise ImportError( + "PyVista is required for this function. Please install it first." + ) from None + from netneurotools.datasets import ( - fetch_fslr_curated, - fetch_fsaverage_curated, fetch_civet_curated, + fetch_fsaverage_curated, + fetch_fslr_curated, ) @@ -314,10 +322,7 @@ def pv_plot_surface( ) # default silhouette settings - silhouette_settings = dict( - color="white", - feature_angle=40 - ) + silhouette_settings = dict(color="white", feature_angle=40) # update if provided with custom settings if plotter_kws is not None: From b1ade675f49efe8943161f1f707d1f7681300201 Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Thu, 14 Nov 2024 21:42:24 -0500 Subject: [PATCH 26/32] Improve PyVista import handling and error reporting --- netneurotools/plotting/pyvista_plotters.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/netneurotools/plotting/pyvista_plotters.py b/netneurotools/plotting/pyvista_plotters.py index 41de804..e711a5d 100644 --- a/netneurotools/plotting/pyvista_plotters.py +++ b/netneurotools/plotting/pyvista_plotters.py @@ -8,9 +8,9 @@ try: import pyvista as pv except ImportError: - raise ImportError( - "PyVista is required for this function. Please install it first." - ) from None + _has_pyvista = False +else: + _has_pyvista = True from netneurotools.datasets import ( fetch_civet_curated, @@ -189,6 +189,9 @@ def pv_plot_surface( Modifies verbosity of download, where higher numbers mean more updates. Default: 0 """ + if not _has_pyvista: + raise ImportError("PyVista is required for this function") + # setup data # could be a single array or a tuple of two arrays if hemi == "both": # both hemispheres From abc386eb3a3e2e0230f0ea5dcb66f97777c4ad78 Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Thu, 14 Nov 2024 21:46:32 -0500 Subject: [PATCH 27/32] Mark test as expected failures --- netneurotools/datasets/tests/test_fetch.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/netneurotools/datasets/tests/test_fetch.py b/netneurotools/datasets/tests/test_fetch.py index 6ef8c5e..b118293 100644 --- a/netneurotools/datasets/tests/test_fetch.py +++ b/netneurotools/datasets/tests/test_fetch.py @@ -137,12 +137,14 @@ def test_fetch_mirchi2018(self, tmpdir): """Test fetching of Mirchi2018 dataset.""" assert False + @pytest.mark.xfail def test_fetch_hansen_manynetworks(self, tmpdir): """Test fetching of Hansen et al., 2023 many-networks dataset.""" - hansen = datasets.fetch_hansen_manynetworks(data_dir=tmpdir, verbose=0) - assert "cammoun033" in hansen - assert "gene" in hansen["cammoun033"] - assert isinstance(hansen["cammoun033"]["gene"], Path) + # hansen = datasets.fetch_hansen_manynetworks(data_dir=tmpdir, verbose=0) + # assert "cammoun033" in hansen + # assert "gene" in hansen["cammoun033"] + # assert isinstance(hansen["cammoun033"]["gene"], Path) + assert False @pytest.mark.xfail def test_fetch_hansen_receptors(self, tmpdir): From 388f7fc683367db8a0e4573b40cfabbb65f0b0e7 Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Fri, 15 Nov 2024 20:26:23 -0500 Subject: [PATCH 28/32] Refactor test_fetch.py --- netneurotools/datasets/tests/test_fetch.py | 226 +++++++++++++-------- 1 file changed, 142 insertions(+), 84 deletions(-) diff --git a/netneurotools/datasets/tests/test_fetch.py b/netneurotools/datasets/tests/test_fetch.py index b118293..0447a0c 100644 --- a/netneurotools/datasets/tests/test_fetch.py +++ b/netneurotools/datasets/tests/test_fetch.py @@ -1,4 +1,5 @@ """For testing netneurotools.datasets.fetch_* functionality.""" + import os import pytest from pathlib import Path @@ -9,63 +10,119 @@ class TestFetchTemplate: """Test fetching of template datasets.""" - @pytest.mark.parametrize('version', [ - 'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6' - ]) + @pytest.mark.parametrize( + "version", ["fsaverage", "fsaverage3", "fsaverage4", "fsaverage5", "fsaverage6"] + ) def test_fetch_fsaverage(self, tmpdir, version): """Test fetching of fsaverage surfaces.""" fsaverage = datasets.fetch_fsaverage( version=version, data_dir=tmpdir, verbose=0 ) - for k in ['orig', 'white', 'smoothwm', 'pial', 'inflated', 'sphere']: - assert hasattr(fsaverage, k) - assert len(fsaverage[k]) == 2 - assert all(os.path.isfile(hemi) for hemi in fsaverage[k]), fsaverage[k] + for k in ["orig", "white", "smoothwm", "pial", "inflated", "sphere"]: + assert k in fsaverage + assert fsaverage[k].L.exists() + assert fsaverage[k].R.exists() + + @pytest.mark.parametrize( + "version", ["fsaverage", "fsaverage4", "fsaverage5", "fsaverage6"] + ) + def test_fetch_fsaverage_curated(self, tmpdir, version): + """Test fetching of curated fsaverage surfaces.""" + fsaverage = datasets.fetch_fsaverage_curated( + version=version, data_dir=tmpdir, verbose=0 + ) + for k in ["white", "pial", "inflated", "sphere", "medial", "sulc", "vaavg"]: + assert k in fsaverage + assert fsaverage[k].L.exists() + assert fsaverage[k].R.exists() def test_fetch_hcp_standards(self, tmpdir): """Test fetching of HCP standard meshes.""" hcp = datasets.fetch_hcp_standards(data_dir=tmpdir, verbose=0) assert hcp.exists() - @pytest.mark.parametrize('version', [ - 'v1', 'v2' - ]) + @pytest.mark.parametrize("version", ["fslr4k", "fslr8k", "fslr32k", "fslr164k"]) + def test_fetch_fslr_curated(self, tmpdir, version): + """Test fetching of curated fsLR surfaces.""" + fslr = datasets.fetch_fslr_curated( + version=version, data_dir=tmpdir, verbose=0 + ) + for k in [ + "midthickness", + "inflated", + "veryinflated", + "sphere", + "medial", + "sulc", + "vaavg", + ]: + if version in ["fslr4k", "fslr8k"] and k == "veryinflated": + continue + assert k in fslr + assert fslr[k].L.exists() + assert fslr[k].R.exists() + + @pytest.mark.parametrize("version", ["v1", "v2"]) def test_fetch_civet(self, tmpdir, version): """Test fetching of CIVET templates.""" civet = datasets.fetch_civet(version=version, data_dir=tmpdir, verbose=0) - for key in ('mid', 'white'): + for key in ("mid", "white"): assert key in civet - for hemi in ('lh', 'rh'): - assert hasattr(civet[key], hemi) - assert os.path.isfile(getattr(civet[key], hemi)) + assert civet[key].L.exists() + assert civet[key].R.exists() + + @pytest.mark.parametrize("version", ["civet41k", "civet164k"]) + def test_fetch_civet_curated(self, tmpdir, version): + """Test fetching of curated CIVET templates.""" + civet = datasets.fetch_civet_curated( + version=version, data_dir=tmpdir, verbose=0 + ) + for k in [ + "white", + "midthickness", + "inflated", + "veryinflated", + "sphere", + "medial", + "sulc", + "vaavg", + ]: + assert k in civet + assert civet[k].L.exists() + assert civet[k].R.exists() def test_fetch_conte69(self, tmpdir): """Test fetching of Conte69 surfaces.""" conte = datasets.fetch_conte69(data_dir=tmpdir, verbose=0) - assert all(hasattr(conte, k) for k in - ['midthickness', 'inflated', 'vinflated', 'info']) + assert all( + hasattr(conte, k) for k in ["midthickness", "inflated", "vinflated", "info"] + ) def test_fetch_yerkes19(self, tmpdir): """Test fetching of Yerkes19 surfaces.""" yerkes19 = datasets.fetch_yerkes19(data_dir=tmpdir, verbose=0) - assert all(hasattr(yerkes19, k) for k in - ['midthickness', 'inflated', 'vinflated']) + assert all( + hasattr(yerkes19, k) for k in ["midthickness", "inflated", "vinflated"] + ) class TestFetchAtlas: """Test fetching of atlas datasets.""" - @pytest.mark.parametrize('version, expected', [ - ('MNI152NLin2009aSym', [1, 1, 1, 1, 1]), - ('fsaverage', [2, 2, 2, 2, 2]), - ('fsaverage5', [2, 2, 2, 2, 2]), - ('fsaverage6', [2, 2, 2, 2, 2]), - ('fslr32k', [2, 2, 2, 2, 2]), - ('gcs', [2, 2, 2, 2, 6]) - ]) + @pytest.mark.parametrize( + "version, expected", + [ + ("MNI152NLin2009aSym", [1, 1, 1, 1, 1]), + ("fsaverage", [2, 2, 2, 2, 2]), + ("fsaverage5", [2, 2, 2, 2, 2]), + ("fsaverage6", [2, 2, 2, 2, 2]), + ("fslr32k", [2, 2, 2, 2, 2]), + ("gcs", [2, 2, 2, 2, 6]), + ], + ) def test_fetch_cammoun2012(self, tmpdir, version, expected): """Test fetching of Cammoun2012 parcellations.""" - keys = ['scale033', 'scale060', 'scale125', 'scale250', 'scale500'] + keys = ["scale033", "scale060", "scale125", "scale250", "scale500"] cammoun = datasets.fetch_cammoun2012(version, data_dir=tmpdir, verbose=0) # output has expected keys @@ -76,20 +133,19 @@ def test_fetch_cammoun2012(self, tmpdir, version, expected): if isinstance(out, (tuple, list)): assert len(out) == e else: - assert isinstance(out, Path) and str(out).endswith('.nii.gz') + assert isinstance(out, Path) and str(out).endswith(".nii.gz") - @pytest.mark.parametrize('version', [ - 'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k' - ]) + @pytest.mark.parametrize( + "version", ["fsaverage", "fsaverage5", "fsaverage6", "fslr32k"] + ) def test_fetch_schaefer2018(self, tmpdir, version): """Test fetching of Schaefer2018 parcellations.""" keys = [ - f'{p}Parcels{n}Networks' - for p in range(100, 1001, 100) for n in [7, 17] + f"{p}Parcels{n}Networks" for p in range(100, 1001, 100) for n in [7, 17] ] schaefer = datasets.fetch_schaefer2018(version, data_dir=tmpdir, verbose=0) - if version == 'fslr32k': + if version == "fslr32k": assert all(k in schaefer and os.path.isfile(schaefer[k]) for k in keys) else: for k in keys: @@ -102,13 +158,15 @@ def test_fetch_mmpall(self, tmpdir): mmp = datasets.fetch_mmpall(data_dir=tmpdir, verbose=0) assert len(mmp) == 2 assert all(os.path.isfile(hemi) for hemi in mmp) - assert all(hasattr(mmp, attr) for attr in ('lh', 'rh')) + assert all(hasattr(mmp, attr) for attr in ("L", "R")) def test_fetch_pauli2018(self, tmpdir): """Test fetching of Pauli2018 parcellations.""" pauli = datasets.fetch_pauli2018(data_dir=tmpdir, verbose=0) - assert all(hasattr(pauli, k) and os.path.isfile(pauli[k]) for k in - ['probabilistic', 'deterministic', 'info']) + assert all( + hasattr(pauli, k) and os.path.isfile(pauli[k]) + for k in ["probabilistic", "deterministic", "info"] + ) @pytest.mark.xfail def test_fetch_ye2020(self, tmpdir): @@ -118,8 +176,8 @@ def test_fetch_ye2020(self, tmpdir): def test_fetch_voneconomo(self, tmpdir): """Test fetching of von Economo parcellations.""" vek = datasets.fetch_voneconomo(data_dir=tmpdir, verbose=0) - assert all(hasattr(vek, k) and len(vek[k]) == 2 for k in ['gcs', 'ctab']) - assert isinstance(vek.get('info'), Path) + assert all(hasattr(vek, k) and len(vek[k]) == 2 for k in ["gcs", "ctab"]) + assert isinstance(vek.get("info"), Path) class TestFetchProject: @@ -128,83 +186,83 @@ class TestFetchProject: def test_fetch_vazquez_rodriguez2019(self, tmpdir): """Test fetching of Vazquez-Rodriguez2019 dataset.""" vazquez = datasets.fetch_vazquez_rodriguez2019(data_dir=tmpdir, verbose=0) - for k in ['rsquared', 'gradient']: + for k in ["rsquared", "gradient"]: assert hasattr(vazquez, k) assert isinstance(getattr(vazquez, k), np.ndarray) @pytest.mark.xfail def test_fetch_mirchi2018(self, tmpdir): """Test fetching of Mirchi2018 dataset.""" - assert False + X, Y = datasets.fetch_mirchi2018(data_dir=tmpdir, verbose=0) + assert isinstance(X, np.ndarray) + assert X.shape == (73, 198135) + assert isinstance(Y, np.ndarray) + assert Y.shape == (73, 13) - @pytest.mark.xfail def test_fetch_hansen_manynetworks(self, tmpdir): """Test fetching of Hansen et al., 2023 many-networks dataset.""" - # hansen = datasets.fetch_hansen_manynetworks(data_dir=tmpdir, verbose=0) + hansen = datasets.fetch_hansen_manynetworks(data_dir=tmpdir, verbose=0) + assert hansen.exists() # assert "cammoun033" in hansen # assert "gene" in hansen["cammoun033"] # assert isinstance(hansen["cammoun033"]["gene"], Path) - assert False - @pytest.mark.xfail def test_fetch_hansen_receptors(self, tmpdir): """Test fetching of Hansen et al., 2022 receptor dataset.""" - # hansen = datasets.fetch_hansen_receptors(data_dir=tmpdir, verbose=0) - assert False + hansen = datasets.fetch_hansen_receptors(data_dir=tmpdir, verbose=0) + assert hansen.exists() - @pytest.mark.xfail - def test_fetch_hansen_genecognition(self, tmpdir): + def test_fetch_hansen_genescognition(self, tmpdir): """Test fetching of Hansen et al., 2021 gene-cognition dataset.""" - # hansen = datasets.fetch_hansen_genecognition(data_dir=tmpdir, verbose=0) - assert False + hansen = datasets.fetch_hansen_genescognition(data_dir=tmpdir, verbose=0) + assert hansen.exists() - @pytest.mark.xfail - def test_fetch_hansen_brainstem(self, tmpdir): + def test_fetch_hansen_brainstemfc(self, tmpdir): """Test fetching of Hansen et al., 2024 brainstem dataset.""" - # hansen = datasets.fetch_hansen_brainstem(data_dir=tmpdir, verbose=0) - assert False + hansen = datasets.fetch_hansen_brainstemfc(data_dir=tmpdir, verbose=0) + assert hansen.exists() - @pytest.mark.xfail def test_fetch_shafiei_megfmrimapping(self, tmpdir): """Test fetching of Shafiei et al., 2022 & 2023 HCP-MEG dataset.""" - # shafiei = datasets.fetch_shafiei_megfmrimapping(data_dir=tmpdir, verbose=0) - assert False + shafiei = datasets.fetch_shafiei_megfmrimapping(data_dir=tmpdir, verbose=0) + assert shafiei.exists() - @pytest.mark.xfail def test_fetch_shafiei_megdynamics(self, tmpdir): """Test fetching of Shafiei et al., 2022 & 2023 HCP-MEG dataset.""" - # shafiei = datasets.fetch_shafiei_megdynamics(data_dir=tmpdir, verbose=0) - assert False + shafiei = datasets.fetch_shafiei_megdynamics(data_dir=tmpdir, verbose=0) + assert shafiei.exists() - @pytest.mark.xfail def test_fetch_suarez_mami(self, tmpdir): """Test fetching of Suarez et al., 2022 mami dataset.""" - # suarez = datasets.fetch_suarez_mami(data_dir=tmpdir, verbose=0) - assert False + suarez = datasets.fetch_suarez_mami(data_dir=tmpdir, verbose=0) + assert suarez.exists() - @pytest.mark.parametrize('dataset, expected', [ - ('celegans', ['conn', 'dist', 'labels', 'ref']), - ('drosophila', ['conn', 'coords', 'labels', 'networks', 'ref']), - ('human_func_scale033', ['conn', 'coords', 'labels', 'ref']), - ('human_func_scale060', ['conn', 'coords', 'labels', 'ref']), - ('human_func_scale125', ['conn', 'coords', 'labels', 'ref']), - ('human_func_scale250', ['conn', 'coords', 'labels', 'ref']), - ('human_func_scale500', ['conn', 'coords', 'labels', 'ref']), - ('human_struct_scale033', ['conn', 'coords', 'dist', 'labels', 'ref']), - ('human_struct_scale060', ['conn', 'coords', 'dist', 'labels', 'ref']), - ('human_struct_scale125', ['conn', 'coords', 'dist', 'labels', 'ref']), - ('human_struct_scale250', ['conn', 'coords', 'dist', 'labels', 'ref']), - ('human_struct_scale500', ['conn', 'coords', 'dist', 'labels', 'ref']), - ('macaque_markov', ['conn', 'dist', 'labels', 'ref']), - ('macaque_modha', ['conn', 'coords', 'dist', 'labels', 'ref']), - ('mouse', ['acronyms', 'conn', 'coords', 'dist', 'labels', 'ref']), - ('rat', ['conn', 'labels', 'ref']), - ]) + @pytest.mark.parametrize( + "dataset, expected", + [ + ("celegans", ["conn", "dist", "labels", "ref"]), + ("drosophila", ["conn", "coords", "labels", "networks", "ref"]), + ("human_func_scale033", ["conn", "coords", "labels", "ref"]), + ("human_func_scale060", ["conn", "coords", "labels", "ref"]), + ("human_func_scale125", ["conn", "coords", "labels", "ref"]), + ("human_func_scale250", ["conn", "coords", "labels", "ref"]), + ("human_func_scale500", ["conn", "coords", "labels", "ref"]), + ("human_struct_scale033", ["conn", "coords", "dist", "labels", "ref"]), + ("human_struct_scale060", ["conn", "coords", "dist", "labels", "ref"]), + ("human_struct_scale125", ["conn", "coords", "dist", "labels", "ref"]), + ("human_struct_scale250", ["conn", "coords", "dist", "labels", "ref"]), + ("human_struct_scale500", ["conn", "coords", "dist", "labels", "ref"]), + ("macaque_markov", ["conn", "dist", "labels", "ref"]), + ("macaque_modha", ["conn", "coords", "dist", "labels", "ref"]), + ("mouse", ["acronyms", "conn", "coords", "dist", "labels", "ref"]), + ("rat", ["conn", "labels", "ref"]), + ], + ) def test_fetch_famous_gmat(self, tmpdir, dataset, expected): """Test fetching of famous G.mat datasets.""" connectome = datasets.fetch_famous_gmat(dataset, data_dir=tmpdir, verbose=0) expected.remove("ref") for key in expected: - assert (key in connectome) - assert isinstance(connectome[key], str if key == 'ref' else np.ndarray) + assert key in connectome + assert isinstance(connectome[key], str if key == "ref" else np.ndarray) From 0ed21fee2a422cc5fad9c4079c9b142b198013ee Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Fri, 15 Nov 2024 20:26:39 -0500 Subject: [PATCH 29/32] Rename datasets and update URL handling for GitHub archives --- docs/api.rst | 2 +- netneurotools/datasets/__init__.py | 4 +- netneurotools/datasets/datasets.json | 52 ++++++++++++------------ netneurotools/datasets/datasets_utils.py | 3 ++ netneurotools/datasets/fetch_project.py | 8 ++-- netneurotools/datasets/fetch_template.py | 2 +- netneurotools/datasets/references.json | 4 +- 7 files changed, 39 insertions(+), 36 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index f6cfcca..81fa2a3 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -59,7 +59,7 @@ To download project-related data fetch_hansen_manynetworks fetch_hansen_receptors fetch_hansen_genecognition - fetch_hansen_brainstem + fetch_hansen_brainstemfc fetch_shafiei_megfmrimapping fetch_shafiei_megdynamics fetch_suarez_mami diff --git a/netneurotools/datasets/__init__.py b/netneurotools/datasets/__init__.py index f7a0cf3..b14674e 100644 --- a/netneurotools/datasets/__init__.py +++ b/netneurotools/datasets/__init__.py @@ -24,7 +24,7 @@ fetch_vazquez_rodriguez2019, fetch_mirchi2018, # new projects fetch_hansen_manynetworks, fetch_hansen_receptors, - fetch_hansen_genecognition, fetch_hansen_brainstem, + fetch_hansen_genescognition, fetch_hansen_brainstemfc, fetch_shafiei_megfmrimapping, fetch_shafiei_megdynamics, fetch_suarez_mami, # example data @@ -51,7 +51,7 @@ # fetch_project 'fetch_vazquez_rodriguez2019', 'fetch_mirchi2018', 'fetch_hansen_manynetworks', 'fetch_hansen_receptors', - 'fetch_hansen_genecognition', 'fetch_hansen_brainstem', + 'fetch_hansen_genescognition', 'fetch_hansen_brainstemfc', 'fetch_shafiei_megfmrimapping', 'fetch_shafiei_megdynamics', 'fetch_suarez_mami', 'fetch_famous_gmat', diff --git a/netneurotools/datasets/datasets.json b/netneurotools/datasets/datasets.json index 1beb965..c6011a1 100644 --- a/netneurotools/datasets/datasets.json +++ b/netneurotools/datasets/datasets.json @@ -397,63 +397,63 @@ "rel-path": "ds-hansen_manynetworks" }, "ds-hansen_receptors": { - "url-type": "github-release", + "url-type": "github-archive", "url": [ "netneurolab", "hansen_receptors", - "v1.0.0" + "f8b41da92a733f99368c1f39d4959731aa1316c1" ], - "md5": "", + "md5": "8d7472135ed1ecf18899ba6f680526c8", "uncompress": true, - "rename-folder": "hansen_receptors-1.0.0", + "rename-folder": "hansen_receptors-f8b41da92a733f99368c1f39d4959731aa1316c1", "rel-path": "ds-hansen_receptors" }, - "ds-hansen_genecognition": { - "url-type": "github-release", + "ds-hansen_genescognition": { + "url-type": "github-archive", "url": [ "netneurolab", - "hansen_genecognition", - "v1.0.0" + "hansen_genescognition", + "bdffc4b22c08bc69530b6b67baa7875ab1b82f77" ], - "md5": "", + "md5": "3f4108cb60234944142301df421e59ef", "uncompress": true, - "rename-folder": "hansen_genecognition-1.0.0", - "rel-path": "ds-hansen_genecognition" + "rename-folder": "hansen_genescognition-bdffc4b22c08bc69530b6b67baa7875ab1b82f77", + "rel-path": "ds-hansen_genescognition" }, - "ds-hansen_brainstem": { - "url-type": "github-release", + "ds-hansen_brainstemfc": { + "url-type": "github-archive", "url": [ "netneurolab", - "hansen_brainstem", - "v1.0.0" + "hansen_brainstemfc", + "522a60f2736f09b5c29cab9b9ffd174e4006e6af" ], - "md5": "", + "md5": "2b330934b5946d510f94040993b337d2", "uncompress": true, - "rename-folder": "hansen_brainstem-1.0.0", - "rel-path": "ds-hansen_brainstem" + "rename-folder": "hansen_brainstemfc-522a60f2736f09b5c29cab9b9ffd174e4006e6af", + "rel-path": "ds-hansen_brainstemfc" }, "ds-shafiei_megfmrimapping": { - "url-type": "github-release", + "url-type": "github-archive", "url": [ "netneurolab", "shafiei_megfmrimapping", - "v1.0.0" + "ba33fe8f3313f582d9422edf93d8f1f13309f8e1" ], - "md5": "", + "md5": "ddb83942e178e032b5787dba60025120", "uncompress": true, - "rename-folder": "shafiei_megfmrimapping-1.0.0", + "rename-folder": "shafiei_megfmrimapping-ba33fe8f3313f582d9422edf93d8f1f13309f8e1", "rel-path": "ds-shafiei_megfmrimapping" }, "ds-shafiei_megdynamics": { - "url-type": "github-release", + "url-type": "github-archive", "url": [ "netneurolab", "shafiei_megdynamics", - "v1.0.0" + "9c8a2a25ba3da78c27539c821be3eba4b0ac84e0" ], - "md5": "", + "md5": "c8a3a3575e0a5e2b9deb11c4f596cf5c", "uncompress": true, - "rename-folder": "shafiei_megdynamics-1.0.0", + "rename-folder": "shafiei_megdynamics-9c8a2a25ba3da78c27539c821be3eba4b0ac84e0", "rel-path": "ds-shafiei_megdynamics" }, "ds-suarez_mami": { diff --git a/netneurotools/datasets/datasets_utils.py b/netneurotools/datasets/datasets_utils.py index 2c39ca8..8fd4ee6 100644 --- a/netneurotools/datasets/datasets_utils.py +++ b/netneurotools/datasets/datasets_utils.py @@ -63,12 +63,15 @@ def _decode_url(url_type, url): """ OSF_API = "https://files.osf.io/v1/resources/{}/providers/osfstorage/{}" GITHUB_RELEASE_API = "https://github.com/{}/{}/archive/refs/tags/{}.tar.gz" + GITHUB_ARCHIVE_API = "https://github.com/{}/{}/archive/{}.tar.gz" ZENODO_FILE_API = "https://zenodo.org/record/{}/files/{}?download=1" if url_type == "osf": out_url = OSF_API.format(*url) elif url_type == "github-release": out_url = GITHUB_RELEASE_API.format(*url) + elif url_type == "github-archive": + out_url = GITHUB_ARCHIVE_API.format(*url) elif url_type == "zenodo-file": out_url = ZENODO_FILE_API.format(*url) else: diff --git a/netneurotools/datasets/fetch_project.py b/netneurotools/datasets/fetch_project.py index 08e04b2..4462307 100644 --- a/netneurotools/datasets/fetch_project.py +++ b/netneurotools/datasets/fetch_project.py @@ -218,7 +218,7 @@ def fetch_hansen_receptors(force=False, data_dir=None, verbose=1): return fetched -def fetch_hansen_genecognition(force=False, data_dir=None, verbose=1): +def fetch_hansen_genescognition(force=False, data_dir=None, verbose=1): """Download files from Hansen et al., 2021, Nature Human Behaviour. This dataset contains @@ -246,7 +246,7 @@ def fetch_hansen_genecognition(force=False, data_dir=None, verbose=1): ---------- .. [1] """ - dataset_name = "ds-hansen_genecognition" + dataset_name = "ds-hansen_genescognition" _get_reference_info(dataset_name, verbose=verbose) fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose) @@ -254,7 +254,7 @@ def fetch_hansen_genecognition(force=False, data_dir=None, verbose=1): return fetched -def fetch_hansen_brainstem(force=False, data_dir=None, verbose=1): +def fetch_hansen_brainstemfc(force=False, data_dir=None, verbose=1): """Download files from Hansen et al., 2024. This dataset contains @@ -282,7 +282,7 @@ def fetch_hansen_brainstem(force=False, data_dir=None, verbose=1): ---------- .. [1] """ - dataset_name = "ds-hansen_brainstem" + dataset_name = "ds-hansen_brainstemfc" _get_reference_info(dataset_name, verbose=verbose) fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose) diff --git a/netneurotools/datasets/fetch_template.py b/netneurotools/datasets/fetch_template.py index 9081efd..9640ccc 100644 --- a/netneurotools/datasets/fetch_template.py +++ b/netneurotools/datasets/fetch_template.py @@ -330,7 +330,7 @@ def fetch_fslr_curated(version="fslr32k", force=False, data_dir=None, verbose=1) "sulc", "vaavg", ] - if version == "fslr4k": + if version in ["fslr4k", "fslr8k"]: keys.remove("veryinflated") keys_suffix = { "midthickness": "midthickness.surf", diff --git a/netneurotools/datasets/references.json b/netneurotools/datasets/references.json index d4ec5d8..fe51a99 100644 --- a/netneurotools/datasets/references.json +++ b/netneurotools/datasets/references.json @@ -281,7 +281,7 @@ } ] }, - "ds-hansen_genecognition": { + "ds-hansen_genescognition": { "primary": [ { "citation": "", @@ -289,7 +289,7 @@ } ] }, - "ds-hansen_brainstem": { + "ds-hansen_brainstemfc": { "primary": [ { "citation": "", From 068a07c06368e406ca2e04fea2acb2d5b4b9f20b Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Mon, 18 Nov 2024 12:56:38 -0500 Subject: [PATCH 30/32] Fix typo in API documentation --- docs/api.rst | 2 +- ...mirchi_2018.py => wip_plot_mirchi_2018.py} | 0 netneurotools/networks/randomize.py | 326 ++++++++++-------- 3 files changed, 183 insertions(+), 145 deletions(-) rename examples/{plot_mirchi_2018.py => wip_plot_mirchi_2018.py} (100%) diff --git a/docs/api.rst b/docs/api.rst index 81fa2a3..1a40097 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -58,7 +58,7 @@ To download project-related data fetch_mirchi2018 fetch_hansen_manynetworks fetch_hansen_receptors - fetch_hansen_genecognition + fetch_hansen_genescognition fetch_hansen_brainstemfc fetch_shafiei_megfmrimapping fetch_shafiei_megdynamics diff --git a/examples/plot_mirchi_2018.py b/examples/wip_plot_mirchi_2018.py similarity index 100% rename from examples/plot_mirchi_2018.py rename to examples/wip_plot_mirchi_2018.py diff --git a/netneurotools/networks/randomize.py b/netneurotools/networks/randomize.py index a91f9ec..9778ea7 100644 --- a/netneurotools/networks/randomize.py +++ b/netneurotools/networks/randomize.py @@ -3,12 +3,11 @@ import bct import numpy as np from tqdm import tqdm -from sklearn.utils.validation import ( - check_random_state -) +from sklearn.utils.validation import check_random_state try: from numba import njit + use_numba = True except ImportError: use_numba = False @@ -65,7 +64,7 @@ def randmio_und(W, itr): # flip edge c-d with 50% probability # to explore all potential rewirings - if np.random.random() > .5: + if np.random.random() > 0.5: i[e2], j[e2] = d, c c, d = d, c @@ -101,9 +100,9 @@ def randmio_und(W, itr): randmio_und = njit(randmio_und) -def match_length_degree_distribution(W, D, nbins=10, nswap=1000, - replacement=False, weighted=True, - seed=None): +def match_length_degree_distribution( + W, D, nbins=10, nswap=1000, replacement=False, weighted=True, seed=None +): """ Generate degree- and edge length-preserving surrogate connectomes. @@ -114,15 +113,15 @@ def match_length_degree_distribution(W, D, nbins=10, nswap=1000, D : (N, N) array-like symmetric distance matrix. nbins : int - number of distance bins (edge length matrix is performed by swapping - connections in the same bin). Default = 10. + number of distance bins (edge length matrix is performed by swapping connections + in the same bin). Default = 10. nswap : int - total number of edge swaps to perform. Recommended = nnodes * 20 + total number of edge swaps to perform. Recommended = nnodes * 20. Default = 1000. replacement : bool, optional - if True all the edges are available for swapping. Default= False + if True all the edges are available for swapping. Default = False. weighted : bool, optional - Whether to return weighted rewired connectivity matrix. Default = True + if True the function returns a weighted matrix. Default = True. seed : float, optional Random seed. Default = None @@ -148,7 +147,6 @@ def match_length_degree_distribution(W, D, nbins=10, nswap=1000, --------- Betzel, R. F., Bassett, D. S. (2018) Specificity and robustness of long-distance connections in weighted, interareal connectomes. PNAS. - """ rs = check_random_state(seed) N = len(W) @@ -170,7 +168,7 @@ def match_length_degree_distribution(W, D, nbins=10, nswap=1000, nr = 0 newB = np.copy(B) - while ((len(cn_x) >= 2) & (nr < nswap)): + while (len(cn_x) >= 2) & (nr < nswap): # choose randomly the edge to be rewired r = rs.randint(len(cn_x)) n_x, n_y = cn_x[r], cn_y[r] @@ -189,7 +187,8 @@ def match_length_degree_distribution(W, D, nbins=10, nswap=1000, # (ops1_x, ops1_y) such that # L(n_x,n_y) = L(n_x, ops1_x) & L(ops1_x,ops1_y) = L(n_y, ops1_y) index = (L[n_x, n_y] == L[n_x, ops1_x]) & ( - L[ops1_x, ops1_y] == L[n_y, ops1_y]) + L[ops1_x, ops1_y] == L[n_y, ops1_y] + ) if len(np.where(index)[0]) == 0: cn_x = np.delete(cn_x, r) cn_y = np.delete(cn_y, r) @@ -197,11 +196,12 @@ def match_length_degree_distribution(W, D, nbins=10, nswap=1000, else: ops2_x, ops2_y = ops1_x[index], ops1_y[index] # options of edges that didn't exist before - index = [(newB[min(n_x, ops2_x[i])][max(n_x, ops2_x[i])] == 0) - & (newB[min(n_y, ops2_y[i])][max(n_y, - ops2_y[i])] == 0) - for i in range(len(ops2_x))] - if (len(np.where(index)[0]) == 0): + index = [ + (newB[min(n_x, ops2_x[i])][max(n_x, ops2_x[i])] == 0) + & (newB[min(n_y, ops2_y[i])][max(n_y, ops2_y[i])] == 0) + for i in range(len(ops2_x)) + ] + if len(np.where(index)[0]) == 0: cn_x = np.delete(cn_x, r) cn_y = np.delete(cn_y, r) @@ -225,8 +225,7 @@ def match_length_degree_distribution(W, D, nbins=10, nswap=1000, if replacement: cn_x[r], cn_y[r] = min(n_x, nn_x), max(n_x, nn_x) index = np.where((cn_x == nn_x) & (cn_y == nn_y))[0] - cn_x[index], cn_y[index] = min( - n_y, nn_y), max(n_y, nn_y) + cn_x[index], cn_y[index] = min(n_y, nn_y), max(n_y, nn_y) # rewire without replacement else: cn_x = np.delete(cn_x, r) @@ -246,13 +245,15 @@ def match_length_degree_distribution(W, D, nbins=10, nswap=1000, if len(np.where(B != 0)[0]) != len(np.where(newB != 0)[0]): print( f"ERROR --- number of edges changed, \ - B:{len(np.where(B != 0)[0])}, newB:{len(np.where(newB != 0)[0])}") + B:{len(np.where(B != 0)[0])}, newB:{len(np.where(newB != 0)[0])}" + ) # check that the degree of the nodes it's the same for i in range(N): if np.sum(B[i]) != np.sum(newB[i]): print( f"ERROR --- node {i} changed k by: \ - {np.sum(B[i]) - np.sum(newB[i])}") + {np.sum(B[i]) - np.sum(newB[i])}" + ) newW = np.zeros((N, N)) if weighted: @@ -278,12 +279,20 @@ def match_length_degree_distribution(W, D, nbins=10, nswap=1000, return newB, newW, nr -def strength_preserving_rand_sa(A, rewiring_iter=10, - nstage=100, niter=10000, - temp=1000, frac=0.5, - energy_type='sse', energy_func=None, - R=None, connected=None, - verbose=False, seed=None): +def strength_preserving_rand_sa( + A, + rewiring_iter=10, + nstage=100, + niter=10000, + temp=1000, + frac=0.5, + energy_type="sse", + energy_func=None, + R=None, + connected=None, + verbose=False, + seed=None, +): """ Strength-preserving network randomization using simulated annealing. @@ -364,12 +373,11 @@ def strength_preserving_rand_sa(A, rewiring_iter=10, try: A = np.asarray(A) except TypeError as err: - msg = ('A must be array_like. Received: {}.'.format(type(A))) + msg = "A must be array_like. Received: {}.".format(type(A)) raise TypeError(msg) from err if frac > 1 or frac <= 0: - msg = ('frac must be between 0 and 1. ' - 'Received: {}.'.format(frac)) + msg = "frac must be between 0 and 1. " "Received: {}.".format(frac) raise ValueError(msg) rs = check_random_state(seed) @@ -396,32 +404,32 @@ def strength_preserving_rand_sa(A, rewiring_iter=10, if energy_func is not None: energy = energy_func(s, sb) - elif energy_type == 'sse': - energy = np.sum((s - sb)**2) - elif energy_type == 'max': + elif energy_type == "sse": + energy = np.sum((s - sb) ** 2) + elif energy_type == "max": energy = np.max(np.abs(s - sb)) - elif energy_type == 'mae': + elif energy_type == "mae": energy = np.mean(np.abs(s - sb)) - elif energy_type == 'mse': - energy = np.mean((s - sb)**2) - elif energy_type == 'rmse': - energy = np.sqrt(np.mean((s - sb)**2)) + elif energy_type == "mse": + energy = np.mean((s - sb) ** 2) + elif energy_type == "rmse": + energy = np.sqrt(np.mean((s - sb) ** 2)) else: - msg = ("energy_type must be one of 'sse', 'max', " - "'mae', 'mse', or 'rmse'. Received: {}.".format(energy_type)) + msg = ( + "energy_type must be one of 'sse', 'max', " + "'mae', 'mse', or 'rmse'. Received: {}.".format(energy_type) + ) raise ValueError(msg) energymin = energy wtsmin = wts.copy() if verbose: - print('\ninitial energy {:.5f}'.format(energy)) - - for istage in tqdm(range(nstage), desc='annealing progress'): + print("\ninitial energy {:.5f}".format(energy)) + for istage in tqdm(range(nstage), desc="annealing progress"): naccept = 0 for _ in range(niter): - # permutation e1 = rs.randint(m) e2 = rs.randint(m) @@ -435,25 +443,28 @@ def strength_preserving_rand_sa(A, rewiring_iter=10, if energy_func is not None: energy_prime = energy_func(sb_prime, s) - elif energy_type == 'sse': - energy_prime = np.sum((sb_prime - s)**2) - elif energy_type == 'max': + elif energy_type == "sse": + energy_prime = np.sum((sb_prime - s) ** 2) + elif energy_type == "max": energy_prime = np.max(np.abs(sb_prime - s)) - elif energy_type == 'mae': + elif energy_type == "mae": energy_prime = np.mean(np.abs(sb_prime - s)) - elif energy_type == 'mse': - energy_prime = np.mean((sb_prime - s)**2) - elif energy_type == 'rmse': - energy_prime = np.sqrt(np.mean((sb_prime - s)**2)) + elif energy_type == "mse": + energy_prime = np.mean((sb_prime - s) ** 2) + elif energy_type == "rmse": + energy_prime = np.sqrt(np.mean((sb_prime - s) ** 2)) else: - msg = ("energy_type must be one of 'sse', 'max', " - "'mae', 'mse', or 'rmse'. " - "Received: {}.".format(energy_type)) + msg = ( + "energy_type must be one of 'sse', 'max', " + "'mae', 'mse', or 'rmse'. " + "Received: {}.".format(energy_type) + ) raise ValueError(msg) # permutation acceptance criterion - if (energy_prime < energy or - rs.rand() < np.exp(-(energy_prime - energy) / temp)): + if energy_prime < energy or rs.rand() < np.exp( + -(energy_prime - energy) / temp + ): sb = sb_prime.copy() wts[[e1, e2]] = wts[[e2, e1]] energy = energy_prime @@ -465,10 +476,12 @@ def strength_preserving_rand_sa(A, rewiring_iter=10, # temperature update temp = temp * frac if verbose: - print('\nstage {:d}, temp {:.5f}, best energy {:.5f}, ' - 'frac of accepted moves {:.3f}'.format(istage, temp, - energymin, - naccept / niter)) + print( + "\nstage {:d}, temp {:.5f}, best energy {:.5f}, " + "frac of accepted moves {:.3f}".format( + istage, temp, energymin, naccept / niter + ) + ) B = np.zeros((n, n)) B[(u, v)] = wtsmin @@ -477,11 +490,18 @@ def strength_preserving_rand_sa(A, rewiring_iter=10, return B, energymin -def strength_preserving_rand_sa_mse_opt(A, rewiring_iter=10, - nstage=100, niter=10000, - temp=1000, frac=0.5, - R=None, connected=None, - verbose=False, seed=None): +def strength_preserving_rand_sa_mse_opt( + A, + rewiring_iter=10, + nstage=100, + niter=10000, + temp=1000, + frac=0.5, + R=None, + connected=None, + verbose=False, + seed=None, +): """ Strength-preserving network randomization using simulated annealing. @@ -549,12 +569,11 @@ def strength_preserving_rand_sa_mse_opt(A, rewiring_iter=10, try: A = np.asarray(A) except TypeError as err: - msg = ('A must be array_like. Received: {}.'.format(type(A))) + msg = "A must be array_like. Received: {}.".format(type(A)) raise TypeError(msg) from err if frac > 1 or frac <= 0: - msg = ('frac must be between 0 and 1. ' - 'Received: {}.'.format(frac)) + msg = "frac must be between 0 and 1. " "Received: {}.".format(frac) raise ValueError(msg) rs = check_random_state(seed) @@ -579,40 +598,39 @@ def strength_preserving_rand_sa_mse_opt(A, rewiring_iter=10, m = len(wts) sb = np.sum(B, axis=1) # strengths of B - energy = np.mean((s - sb)**2) + energy = np.mean((s - sb) ** 2) energymin = energy wtsmin = wts.copy() if verbose: - print('\ninitial energy {:.5f}'.format(energy)) + print("\ninitial energy {:.5f}".format(energy)) - for istage in tqdm(range(nstage), desc='annealing progress'): + for istage in tqdm(range(nstage), desc="annealing progress"): naccept = 0 - for (e1, e2), prob in zip(rs.randint(m, size=(niter, 2)), - rs.rand(niter) - ): - + for (e1, e2), prob in zip(rs.randint(m, size=(niter, 2)), rs.rand(niter)): # permutation a, b, c, d = u[e1], v[e1], u[e2], v[e2] wts_change = wts[e1] - wts[e2] - delta_energy = (2 * wts_change * - (2 * wts_change + - (s[a] - sb[a]) + - (s[b] - sb[b]) - - (s[c] - sb[c]) - - (s[d] - sb[d]) - ) - ) / n + delta_energy = ( + 2 + * wts_change + * ( + 2 * wts_change + + (s[a] - sb[a]) + + (s[b] - sb[b]) + - (s[c] - sb[c]) + - (s[d] - sb[d]) + ) + ) / n # permutation acceptance criterion - if (delta_energy < 0 or prob < np.e**(-(delta_energy) / temp)): - + if delta_energy < 0 or prob < np.e ** (-(delta_energy) / temp): sb[[a, b]] -= wts_change sb[[c, d]] += wts_change wts[[e1, e2]] = wts[[e2, e1]] - energy = np.mean((sb - s)**2) + energy = np.mean((sb - s) ** 2) if energy < energymin: energymin = energy @@ -622,10 +640,12 @@ def strength_preserving_rand_sa_mse_opt(A, rewiring_iter=10, # temperature update temp = temp * frac if verbose: - print('\nstage {:d}, temp {:.5f}, best energy {:.5f}, ' - 'frac of accepted moves {:.3f}'.format(istage, temp, - energymin, - naccept / niter)) + print( + "\nstage {:d}, temp {:.5f}, best energy {:.5f}, " + "frac of accepted moves {:.3f}".format( + istage, temp, energymin, naccept / niter + ) + ) B = np.zeros((n, n)) B[(u, v)] = wtsmin @@ -634,12 +654,19 @@ def strength_preserving_rand_sa_mse_opt(A, rewiring_iter=10, return B, energymin -def strength_preserving_rand_sa_dir(A, rewiring_iter=10, - nstage=100, niter=10000, - temp=1000, frac=0.5, - energy_type='sse', energy_func=None, - connected=True, verbose=False, - seed=None): +def strength_preserving_rand_sa_dir( + A, + rewiring_iter=10, + nstage=100, + niter=10000, + temp=1000, + frac=0.5, + energy_type="sse", + energy_func=None, + connected=True, + verbose=False, + seed=None, +): """ Strength-preserving network randomization using simulated annealing. @@ -716,12 +743,11 @@ def strength_preserving_rand_sa_dir(A, rewiring_iter=10, try: A = np.asarray(A) except TypeError as err: - msg = ('A must be array_like. Received: {}.'.format(type(A))) + msg = "A must be array_like. Received: {}.".format(type(A)) raise TypeError(msg) from err if frac > 1 or frac <= 0: - msg = ('frac must be between 0 and 1. ' - 'Received: {}.'.format(frac)) + msg = "frac must be between 0 and 1. " "Received: {}.".format(frac) raise ValueError(msg) rs = check_random_state(seed) @@ -744,33 +770,34 @@ def strength_preserving_rand_sa_dir(A, rewiring_iter=10, if energy_func is not None: energy = energy_func(s_in, sb_in) + energy_func(s_out, sb_out) - elif energy_type == 'sse': - energy = np.sum((s_in - sb_in)**2) + np.sum((s_out - sb_out)**2) - elif energy_type == 'max': + elif energy_type == "sse": + energy = np.sum((s_in - sb_in) ** 2) + np.sum((s_out - sb_out) ** 2) + elif energy_type == "max": energy = np.max(np.abs(s_in - sb_in)) + np.max(np.abs(s_out - sb_out)) - elif energy_type == 'mae': + elif energy_type == "mae": energy = np.mean(np.abs(s_in - sb_in)) + np.mean(np.abs(s_out - sb_out)) - elif energy_type == 'mse': - energy = np.mean((s_in - sb_in)**2) + np.mean((s_out - sb_out)**2) - elif energy_type == 'rmse': - energy = (np.sqrt(np.mean((s_in - sb_in)**2)) + - np.sqrt(np.mean((s_out - sb_out)**2))) + elif energy_type == "mse": + energy = np.mean((s_in - sb_in) ** 2) + np.mean((s_out - sb_out) ** 2) + elif energy_type == "rmse": + energy = np.sqrt(np.mean((s_in - sb_in) ** 2)) + np.sqrt( + np.mean((s_out - sb_out) ** 2) + ) else: - msg = ("energy_type must be one of 'sse', 'max', " - "'mae', 'mse', or 'rmse'. Received: {}.".format(energy_type)) + msg = ( + "energy_type must be one of 'sse', 'max', " + "'mae', 'mse', or 'rmse'. Received: {}.".format(energy_type) + ) raise ValueError(msg) energymin = energy wtsmin = wts.copy() if verbose: - print('\ninitial energy {:.5f}'.format(energy)) - - for istage in tqdm(range(nstage), desc='annealing progress'): + print("\ninitial energy {:.5f}".format(energy)) + for istage in tqdm(range(nstage), desc="annealing progress"): naccept = 0 for _ in range(niter): - # permutation e1 = rs.randint(m) e2 = rs.randint(m) @@ -786,32 +813,41 @@ def strength_preserving_rand_sa_dir(A, rewiring_iter=10, sb_prime_out[c] = sb_prime_out[c] - wts[e2] + wts[e1] if energy_func is not None: - energy_prime = (energy_func(sb_prime_in, s_in) + - energy_func(sb_prime_out, s_out)) - elif energy_type == 'sse': - energy_prime = (np.sum((sb_prime_in - s_in)**2) + - np.sum((sb_prime_out - s_out)**2)) - elif energy_type == 'max': - energy_prime = (np.max(np.abs(sb_prime_in - s_in)) + - np.max(np.abs(sb_prime_out - s_out))) - elif energy_type == 'mae': - energy_prime = (np.mean(np.abs(sb_prime_in - s_in)) + - np.mean(np.abs(sb_prime_out - s_out))) - elif energy_type == 'mse': - energy_prime = (np.mean((sb_prime_in - s_in)**2) + - np.mean((sb_prime_out - s_out)**2)) - elif energy_type == 'rmse': - energy_prime = (np.sqrt(np.mean((sb_prime_in - s_in)**2)) + - np.sqrt(np.mean((sb_prime_out - s_out)**2))) + energy_prime = energy_func(sb_prime_in, s_in) + energy_func( + sb_prime_out, s_out + ) + elif energy_type == "sse": + energy_prime = np.sum((sb_prime_in - s_in) ** 2) + np.sum( + (sb_prime_out - s_out) ** 2 + ) + elif energy_type == "max": + energy_prime = np.max(np.abs(sb_prime_in - s_in)) + np.max( + np.abs(sb_prime_out - s_out) + ) + elif energy_type == "mae": + energy_prime = np.mean(np.abs(sb_prime_in - s_in)) + np.mean( + np.abs(sb_prime_out - s_out) + ) + elif energy_type == "mse": + energy_prime = np.mean((sb_prime_in - s_in) ** 2) + np.mean( + (sb_prime_out - s_out) ** 2 + ) + elif energy_type == "rmse": + energy_prime = np.sqrt(np.mean((sb_prime_in - s_in) ** 2)) + np.sqrt( + np.mean((sb_prime_out - s_out) ** 2) + ) else: - msg = ("energy_type must be one of 'sse', 'max', " - "'mae', 'mse', or 'rmse'. " - "Received: {}.".format(energy_type)) + msg = ( + "energy_type must be one of 'sse', 'max', " + "'mae', 'mse', or 'rmse'. " + "Received: {}.".format(energy_type) + ) raise ValueError(msg) # permutation acceptance criterion - if (energy_prime < energy or - rs.rand() < np.exp(-(energy_prime - energy) / temp)): + if energy_prime < energy or rs.rand() < np.exp( + -(energy_prime - energy) / temp + ): sb_in = sb_prime_in.copy() sb_out = sb_prime_out.copy() wts[[e1, e2]] = wts[[e2, e1]] @@ -824,10 +860,12 @@ def strength_preserving_rand_sa_dir(A, rewiring_iter=10, # temperature update temp = temp * frac if verbose: - print('\nstage {:d}, temp {:.5f}, best energy {:.5f}, ' - 'frac of accepted moves {:.3f}'.format(istage, temp, - energymin, - naccept / niter)) + print( + "\nstage {:d}, temp {:.5f}, best energy {:.5f}, " + "frac of accepted moves {:.3f}".format( + istage, temp, energymin, naccept / niter + ) + ) B = np.zeros((n, n)) B[(u, v)] = wtsmin From 0d47815f15f720006ab49a87cbd3bd6f717e0482 Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Mon, 18 Nov 2024 14:29:55 -0500 Subject: [PATCH 31/32] Update documentation to reflect transition from PySurfer to Pyvista for surface plotting --- docs/api.rst | 14 ++++++------- docs/installation.rst | 47 ++++++++++++++++++++++++++++++++++++------- 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 1a40097..a537069 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -120,24 +120,24 @@ Convenient functions .. currentmodule:: netneurotools.plotting -PySurfer +Pyvista .. autosummary:: :template: function.rst :toctree: generated/ - plot_conte69 - plot_fslr - plot_fsaverage - plot_fsvertex + pv_plot_surface -Pyvista +PySurfer (deprecated) .. autosummary:: :template: function.rst :toctree: generated/ - pv_plot_surface + plot_conte69 + plot_fslr + plot_fsaverage + plot_fsvertex matplotlib diff --git a/docs/installation.rst b/docs/installation.rst index 79f91de..0e4f129 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -24,15 +24,47 @@ Alternatively, you can install ``netneurotools`` directly from PyPi with: pip install netneurotools + Optional installation for surface plotting ------------------------------------------ -In order to use surface plotting functionality like -:py:func:`netneurotools.plotting.plot_fsaverage`, you will need a working -``vtk``/``mayavi``/``pysurfer`` installation. These can generally be installed -with the following command: +Pyvista +~~~~~~~ + +This is the new plotting library used in the package. This will allow you to use functions like + +- :py:func:`netneurotools.plotting.pv_plot_surface` + +You will need a working ``pyvista`` installation. +Generally, we recommend using a clean conda environment, and install Pyvista using the following commands: + +.. code-block:: bash + + conda create -n plotting python=3.12 + conda activate plotting + conda install -c conda-forge pyvista + # if you are using Jupyter notebooks + conda install -c conda-forge jupyterlab trame trame-vtk trame-vuetify trame-jupyter-extension + +If you meet any issues, please refer to the +`detailed installation guide `_. -.. code-block: bash + +Pysurfer (deprecated) +~~~~~~~~~~~~~~~~~~~~~ + +This is the old plotting library used in the package. It is now deprecated in favor of Pyvista. +This will allow you to use functions like + +- :py:func:`netneurotools.plotting.plot_fsaverage` +- :py:func:`netneurotools.plotting.plot_fslr` +- :py:func:`netneurotools.plotting.plot_conte69` +- :py:func:`netneurotools.plotting.plot_fsvertex` + +You will need a working ``vtk``/``mayavi``/``pysurfer`` installation. +These can generally be installed with the following command: + +.. code-block:: bash pip install vtk mayavi pysurfer @@ -94,8 +126,9 @@ installation, there is generally no need to follow these instructions! - Install from source ``pip install git+https://github.com/netneurolab/netneurotools.git`` -Troubleshooting -~~~~~~~~~~~~~~~ + +Here are some common issues and their solutions: + - Error related to ``from tvtk.vtk_module import VTK_MAJOR_VERSION`` From 7e55153d58f38a6d8be06839c4e0a9b00444f325 Mon Sep 17 00:00:00 2001 From: Zhen-Qi Liu Date: Mon, 18 Nov 2024 14:30:03 -0500 Subject: [PATCH 32/32] Add transforms module for datasets --- netneurotools/datasets/transforms.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 netneurotools/datasets/transforms.py diff --git a/netneurotools/datasets/transforms.py b/netneurotools/datasets/transforms.py new file mode 100644 index 0000000..120a8b0 --- /dev/null +++ b/netneurotools/datasets/transforms.py @@ -0,0 +1 @@ +"""Transforms for datasets."""