Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce memory usage of KernelTransformer.transform and meta.utils.compute_kda_ma #676

Merged
merged 18 commits into from
May 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions nimare/meta/cbma/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def _compute_weights(self, ma_values):
"""
return None

def _collect_ma_maps(self, coords_key="coordinates", maps_key="ma_maps"):
def _collect_ma_maps(self, coords_key="coordinates", maps_key="ma_maps", return_type="array"):
"""Collect modeled activation maps from Estimator inputs.

Parameters
Expand All @@ -215,10 +215,11 @@ def _collect_ma_maps(self, coords_key="coordinates", maps_key="ma_maps"):

else:
LGR.debug(f"Generating MA maps from coordinates ({coords_key}).")

ma_maps = self.kernel_transformer.transform(
self.inputs_[coords_key],
masker=self.masker,
return_type="array",
return_type=return_type,
)

return ma_maps
Expand Down
22 changes: 20 additions & 2 deletions nimare/meta/cbma/mkda.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""CBMA methods from the multilevel kernel density analysis (MKDA) family."""
import gc
import logging

import nibabel as nib
import numpy as np
import sparse
from joblib import Parallel, delayed
from scipy import ndimage
from scipy.stats import chi2
Expand Down Expand Up @@ -320,21 +322,37 @@ def _fit(self, dataset1, dataset2):
ma_maps1 = self._collect_ma_maps(
maps_key="ma_maps1",
coords_key="coordinates1",
return_type="sparse",
)
n_selected = ma_maps1.shape[0]
n_selected_active_voxels = np.sum(ma_maps1.astype(bool), axis=0)
n_selected_active_voxels = ma_maps1.sum(axis=0)

if isinstance(n_selected_active_voxels, sparse._coo.core.COO):
masker = dataset1.masker if not self.masker else self.masker
mask = masker.mask_img
mask_data = mask.get_fdata().astype(bool)

# Indexing the sparse array is slow, perform masking in the dense array
n_selected_active_voxels = n_selected_active_voxels.todense().reshape(-1)
n_selected_active_voxels = n_selected_active_voxels[mask_data.reshape(-1)]

del ma_maps1
gc.collect()

# Generate MA maps and calculate count variables for second dataset
ma_maps2 = self._collect_ma_maps(
maps_key="ma_maps2",
coords_key="coordinates2",
return_type="sparse",
)
n_unselected = ma_maps2.shape[0]
n_unselected_active_voxels = np.sum(ma_maps2.astype(bool), axis=0)
n_unselected_active_voxels = ma_maps2.sum(axis=0)
if isinstance(n_unselected_active_voxels, sparse._coo.core.COO):
n_unselected_active_voxels = n_unselected_active_voxels.todense().reshape(-1)
n_unselected_active_voxels = n_unselected_active_voxels[mask_data.reshape(-1)]

del ma_maps2
gc.collect()

n_mappables = n_selected + n_unselected

Expand Down
32 changes: 16 additions & 16 deletions nimare/meta/kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import nibabel as nib
import numpy as np
import pandas as pd
import sparse
from nilearn import image

from nimare import references
Expand Down Expand Up @@ -89,7 +90,7 @@ def transform(self, dataset, masker=None, return_type="image"):
Mask to apply to MA maps. Required if ``dataset`` is a DataFrame.
If None (and ``dataset`` is a Dataset), the Dataset's masker attribute will be used.
Default is None.
return_type : {'array', 'image', 'dataset'}, optional
return_type : {'sparse', 'array', 'image', 'dataset'}, optional
Whether to return a numpy array ('array'), a list of niimgs ('image'),
or a Dataset with MA images saved as files ('dataset').
Default is 'image'.
Expand All @@ -98,6 +99,9 @@ def transform(self, dataset, masker=None, return_type="image"):
-------
imgs : (C x V) :class:`numpy.ndarray` or :obj:`list` of :class:`nibabel.Nifti1Image` \
or :class:`~nimare.dataset.Dataset`
If return_type is 'sparse', a 4D sparse array (E x S), where E is
the number of unique experiments, and the remaining 3 dimensions are
equal to `shape` of the images.
If return_type is 'array', a 2D numpy array (C x V), where C is
contrast and V is voxel.
If return_type is 'image', a list of modeled activation images
Expand All @@ -113,7 +117,7 @@ def transform(self, dataset, masker=None, return_type="image"):
image_type : str
Name of the corresponding column in the Dataset.images DataFrame.
"""
if return_type not in ("array", "image", "dataset"):
if return_type not in ("sparse", "array", "image", "dataset"):
raise ValueError('Argument "return_type" must be "image", "array", or "dataset".')

if isinstance(dataset, pd.DataFrame):
Expand Down Expand Up @@ -195,19 +199,16 @@ def transform(self, dataset, masker=None, return_type="image"):

transformed_maps = self._transform(mask, coordinates)

# This will be a numpy.ndarray if the kernel is an (M)KDAKernel.
if not isinstance(transformed_maps[0], (list, tuple)):
if return_type == "array":
ma_arr = transformed_maps[0][:, mask_data]

return ma_arr
else:
# Transform into an length-N list of length-2 tuples,
# composed of a 3D array and a string with the ID.
transformed_maps = list(zip(*transformed_maps))
if return_type == "sparse":
return transformed_maps[0]

imgs = []
for (kernel_data, id_) in transformed_maps:
# Loop over exp ids since sparse._coo.core.COO is not iterable
for i_exp, id_ in enumerate(transformed_maps[1]):
if isinstance(transformed_maps[0][i_exp], sparse._coo.core.COO):
kernel_data = transformed_maps[0][i_exp].todense()
else:
kernel_data = transformed_maps[0][i_exp]

if return_type == "array":
img = kernel_data[mask_data]
Expand Down Expand Up @@ -311,7 +312,6 @@ def _transform(self, mask, coordinates):
kernels = {} # retain kernels in dictionary to speed things up
exp_ids = coordinates["id"].unique()

# Use a list of tuples
transformed = []
for i_exp, id_ in enumerate(exp_ids):
data = coordinates.loc[coordinates["id"] == id_]
Expand Down Expand Up @@ -341,9 +341,9 @@ def _transform(self, mask, coordinates):

kernel_data = compute_ale_ma(mask.shape, ijk, kern)

transformed.append((kernel_data, id_))
transformed.append(kernel_data)

return transformed
return transformed, exp_ids


class KDAKernel(KernelTransformer):
Expand Down
36 changes: 25 additions & 11 deletions nimare/meta/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import nibabel as nib
import numpy as np
import numpy.linalg as npl
import sparse
from scipy import ndimage

from nimare import references
Expand Down Expand Up @@ -288,6 +289,7 @@ def compute_kda_ma(
.. versionchanged:: 0.0.12

* Remove low-memory option in favor of sparse arrays.
* Return 4D sparse array.

.. versionadded:: 0.0.4

Expand Down Expand Up @@ -316,39 +318,51 @@ def compute_kda_ma(

Returns
-------
kernel_data : :obj:`numpy.array`
3d or 4d array. If `exp_idx` is none, a 3d array in the same shape as
the `shape` argument is returned. If `exp_idx` is passed, a 4d array
kernel_data : :obj:`sparse._coo.core.COO`
4D sparse array. If `exp_idx` is none, a 3d array in the same
shape as the `shape` argument is returned. If `exp_idx` is passed, a 4d array
is returned, where the first dimension has size equal to the number of
unique experiments, and the remaining 3 dimensions are equal to `shape`.
"""
squeeze = exp_idx is None
if exp_idx is None:
exp_idx = np.ones(len(ijks))

uniq, exp_idx = np.unique(exp_idx, return_inverse=True)
n_studies = len(uniq)

kernel_shape = (n_studies,) + shape
kernel_data = np.zeros(kernel_shape, dtype=type(value))

n_dim = ijks.shape[1]
xx, yy, zz = [slice(-r // vox_dims[i], r // vox_dims[i] + 0.01, 1) for i in range(n_dim)]
cube = np.vstack([row.ravel() for row in np.mgrid[xx, yy, zz]])
kernel = cube[:, np.sum(np.dot(np.diag(vox_dims), cube) ** 2, 0) ** 0.5 <= r]

# Preallocate coords array, np.concatenate is too slow
n_coords = ijks.shape[0] * kernel.shape[1] # = n_peaks * n_voxels
coords = np.zeros((4, n_coords), dtype=int)
temp_idx = 0
for i, peak in enumerate(ijks):
sphere = np.round(kernel.T + peak)
idx = (np.min(sphere, 1) >= 0) & (np.max(np.subtract(sphere, shape), 1) <= -1)
sphere = sphere[idx, :].astype(int)
exp = exp_idx[i]
if sum_overlap:
kernel_data[exp][tuple(sphere.T)] += value
else:
kernel_data[exp][tuple(sphere.T)] = value

if squeeze:
kernel_data = np.squeeze(kernel_data, axis=0)
n_brain_voxels = sphere.shape[0]
chunk_idx = np.arange(temp_idx, temp_idx + n_brain_voxels, 1, dtype=int)

coords[0, chunk_idx] = np.full((1, n_brain_voxels), exp)
coords[1:, chunk_idx] = sphere.T

temp_idx += n_brain_voxels

# Usually coords.shape[1] < n_coords, since n_brain_voxels < n_voxels sometimes
coords = coords[:, :temp_idx]

if not sum_overlap:
coords = np.unique(coords, axis=1)

data = np.full(coords.shape[1], value)
kernel_data = sparse.COO(coords, data, shape=kernel_shape)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would we want to make it parameterizable if sparse or dense arrays are used?

cc: @tsalo


return kernel_data

Expand Down
3 changes: 3 additions & 0 deletions nimare/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,14 @@ def _create_signal_mask(ground_truth_foci_ijks, mask):
sig_prob_map = compute_kda_ma(
dims, vox_dims, ground_truth_foci_ijks, r=2, value=1, sum_overlap=False
)
sig_prob_map = sig_prob_map[0].todense()

# area where I'm reasonably certain there are not significant results
nonsig_prob_map = compute_kda_ma(
dims, vox_dims, ground_truth_foci_ijks, r=14, value=1, sum_overlap=False
)
nonsig_prob_map = nonsig_prob_map[0].todense()

sig_map = nib.Nifti1Image((sig_prob_map == 1).astype(int), affine=mask.affine)
nonsig_map = nib.Nifti1Image((nonsig_prob_map == 0).astype(int), affine=mask.affine)
return sig_map, nonsig_map
Expand Down
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,14 @@ install_requires =
matplotlib<3.5 # this is for nilearn, which doesn't include it in its reqs
nibabel>=3.0.0
nilearn>=0.7.1
numba # used by sparse
numpy
pandas
pymare>=0.0.3rc2
requests
scikit-learn
scipy
sparse>=0.13.0 # for kernel transformers
statsmodels!=0.13.2 # this version doesn't install properly
tqdm
packages = find:
Expand Down