Skip to content

Commit

Permalink
refactor dispersion features
Browse files Browse the repository at this point in the history
  • Loading branch information
Henley13 committed Aug 5, 2020
1 parent ac7a271 commit eadcab1
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 161 deletions.
4 changes: 0 additions & 4 deletions bigfish/classification/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@
from .features import features_distance
from .features import features_in_out_nucleus
from .features import features_protrusion
from .features import features_polarization
from .features import features_dispersion
from .features import features_peripheral_dispersion
from .features import features_topography
from .features import features_foci
from .features import features_area
Expand All @@ -32,9 +30,7 @@
"features_distance"
"features_in_out_nucleus"
"features_protrusion"
"features_polarization"
"features_dispersion"
"features_peripheral_dispersion"
"features_topography"
"features_foci"
"features_area"
Expand Down
255 changes: 100 additions & 155 deletions bigfish/classification/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@

# ### Main functions ###

def compute_features(cell_mask, nuc_mask, ndim, rna_coord, voxel_size_yx=None,
centrosome_coord=None, compute_distance=True,
compute_intranuclear=True, compute_protrusion=True,
compute_dispersion=True, compute_topography=True,
compute_foci=True, compute_area=True,
compute_centrosome=True):
def compute_features(cell_mask, nuc_mask, ndim, rna_coord, smfish=None,
voxel_size_yx=None, centrosome_coord=None,
compute_distance=True, compute_intranuclear=True,
compute_protrusion=True, compute_dispersion=True,
compute_topography=True, compute_foci=True,
compute_area=True, compute_centrosome=True):
"""Compute requested features.
Parameters
Expand All @@ -41,6 +41,8 @@ def compute_features(cell_mask, nuc_mask, ndim, rna_coord, voxel_size_yx=None,
plus the index of the cluster assigned to the spot. If no cluster was
assigned, value is -1. If cluster id is not provided foci related
features are not computed.
smfish : np.ndarray, np.uint
Image of RNAs, with shape (y, x).
voxel_size_yx : int, float or None
Size of a voxel on the yx plan, in nanometer.
centrosome_coord : np.ndarray, np.int64
Expand Down Expand Up @@ -80,6 +82,10 @@ def compute_features(cell_mask, nuc_mask, ndim, rna_coord, voxel_size_yx=None,
compute_foci=bool,
compute_area=bool,
compute_centrosome=bool)
if smfish is not None:
stack.check_array(smfish, ndim=[2, 3], dtype=[np.uint8, np.uint16])
if smfish.ndim == 3:
smfish = stack.maximum_projection(smfish)

# prepare input data
(cell_mask,
Expand Down Expand Up @@ -114,15 +120,13 @@ def compute_features(cell_mask, nuc_mask, ndim, rna_coord, voxel_size_yx=None,
# False)

# dispersion indices
if compute_dispersion:
features += features_polarization(
centroid_rna, centroid_cell, centroid_nuc,
distance_centroid_cell, distance_centroid_nuc, ndim, False)
dispersion_index = features_dispersion(
rna_coord, distance_centroid_rna, cell_mask, ndim, False)
peripheral_dispersion_index = features_peripheral_dispersion(
rna_coord, distance_centroid_cell, cell_mask, ndim, False)
features += (dispersion_index, peripheral_dispersion_index)
if compute_dispersion and smfish is not None:
features += features_dispersion(
smfish, rna_coord, centroid_rna, cell_mask, centroid_cell,
centroid_nuc, ndim, False)
elif compute_dispersion and smfish is None:
raise ValueError("Dispersion features can't be computed because "
"'smfish' is not provided.")

# topographic features
if compute_topography and voxel_size_yx is not None:
Expand Down Expand Up @@ -153,7 +157,7 @@ def compute_features(cell_mask, nuc_mask, ndim, rna_coord, voxel_size_yx=None,
elif compute_centrosome and centrosome_coord is None:
raise ValueError("Centrosome related features can't be computed "
"because 'centrosome_coord' is not provided.")
elif (compute_centrosome and voxel_size_yx is None):
elif compute_centrosome and voxel_size_yx is None:
raise ValueError("Centrosome related features can't be computed "
"because 'voxel_size_yx' is not provided.")

Expand Down Expand Up @@ -226,14 +230,13 @@ def get_features_name(names_features_distance=True,
"nb_rna_out_nuc",
"nb_rna_in_nuc"]

#if names_features_protrusion:
# features_name += ["index_rna_opening_30",
# "proportion_rna_opening_30",
# "area_opening_30"]
# if names_features_protrusion:
# features_name += ["index_rna_opening_30",
# "proportion_rna_opening_30",
# "area_opening_30"]

if names_features_dispersion:
features_name += ["score_polarization_cell",
"score_polarization_nuc",
features_name += ["index_polarization",
"index_dispersion",
"index_peripheral_dispersion"]

Expand All @@ -253,12 +256,12 @@ def get_features_name(names_features_distance=True,
"proportion_rna_cell_radius_{}_{}".format(a, b)]
a = b

#if names_features_foci:
# features_name += ["proportion_rna_in_foci",
# "index_foci_mean_distance_cyt",
# "index_foci_median_distance_cyt",
# "index_foci_mean_distance_nuc",
# "index_foci_median_distance_nuc"]
# if names_features_foci:
# features_name += ["proportion_rna_in_foci",
# "index_foci_mean_distance_cyt",
# "index_foci_median_distance_cyt",
# "index_foci_mean_distance_nuc",
# "index_foci_median_distance_nuc"]

if names_features_area:
features_name += ["proportion_nuc_area",
Expand Down Expand Up @@ -477,98 +480,41 @@ def features_protrusion(rna_coord_out_nuc, cell_mask, nuc_mask,
return features


def features_polarization(centroid_rna, centroid_cell, centroid_nuc,
distance_centroid_cell, distance_centroid_nuc, ndim,
check_input=True):
"""Compute polarization related features.
def features_dispersion(smfish, rna_coord, centroid_rna, cell_mask,
centroid_cell, centroid_nuc, ndim, check_input=True):
"""Compute RNA Distribution Index features (RDI) described in
# TODO add reference to RDI paper
RDI Calculator: An analysis Tool to assess RNA distributions in cells,
Stueland M., Wang T., Park H. Y., Mili, S., 201
Parameters
----------
smfish : np.ndarray, np.uint
Image of RNAs, with shape (y, x).
rna_coord : np.ndarray, np.int64
Coordinates of the detected RNAs with zyx or yx coordinates in the
first 3 or 2 columns.
centroid_rna : np.ndarray, np.int64
Coordinates of the rna centroid with shape (1, 2).
cell_mask : np.ndarray, bool
Surface of the cell with shape (y, x).
centroid_cell : np.ndarray, np.int64
Coordinates of the cell centroid with shape (1, 2).
centroid_nuc : np.ndarray, np.int64
Coordinates of the nucleus centroid with shape (1, 2).
distance_centroid_cell : np.ndarray, np.float32
Distance map from the cell centroid with shape (y, x).
distance_centroid_nuc : np.ndarray, np.float32
Distance map from the nucleus centroid with shape (y, x).
ndim : int
Number of spatial dimensions to consider.
check_input : bool
Check input validity.
Returns
-------
index_polarization_cell : float
Polarization index around the cell centroid.
index_polarization_nuc : float
Polarization index around the nucleus centroid.
"""
# check parameters
stack.check_parameter(check_input=bool)
if check_input:
stack.check_parameter(ndim=int)
if ndim not in [2, 3]:
raise ValueError("'ndim' should be 2 or 3, not {0}.".format(ndim))
stack.check_array(centroid_rna, ndim=2, dtype=np.int64)
stack.check_array(centroid_cell, ndim=2, dtype=np.int64)
stack.check_array(centroid_nuc, ndim=2, dtype=np.int64)
stack.check_array(distance_centroid_cell, ndim=2,
dtype=[np.float16, np.float32, np.float64])
stack.check_array(distance_centroid_nuc, ndim=2,
dtype=[np.float16, np.float32, np.float64])

# initialization
if ndim == 3:
centroid_rna_2d = centroid_rna[1:]
else:
centroid_rna_2d = centroid_rna.copy()

# compute polarization index from cell centroid
centroid_distance = np.linalg.norm(centroid_rna_2d - centroid_cell)
maximum_distance = distance_centroid_cell.max()
index_polarization_cell = centroid_distance / maximum_distance

# compute polarization index from nucleus centroid
centroid_distance = np.linalg.norm(centroid_rna_2d - centroid_nuc)
maximum_distance = distance_centroid_nuc.max()
index_polarization_nuc = centroid_distance / maximum_distance

# gather features
features = (index_polarization_cell, index_polarization_nuc)

return features


def features_dispersion(rna_coord, distance_centroid_rna, cell_mask, ndim,
check_input=True):
"""Compute a dispersion index.
# TODO add reference to RDI paper
Parameters
----------
rna_coord : np.ndarray, np.int64
Coordinates of the detected RNAs with zyx or yx coordinates in the
first 3 or 2 columns.
distance_centroid_rna : np.ndarray, np.float32
Distance map from the rna centroid with shape (y, x).
cell_mask : np.ndarray, bool
Surface of the cell with shape (y, x).
ndim : int
Number of spatial dimensions to consider.
check_input : bool
Check input validity.
Returns
-------
index_polarization : float
Polarization index (PI).
index_dispersion : float
Dispersion index.
Dispersion index (DI).
index_peripheral_dispersion : float
Peripheral dispersion index (PDI).
"""
# check parameters
Expand All @@ -577,84 +523,83 @@ def features_dispersion(rna_coord, distance_centroid_rna, cell_mask, ndim,
stack.check_parameter(ndim=int)
if ndim not in [2, 3]:
raise ValueError("'ndim' should be 2 or 3, not {0}.".format(ndim))
stack.check_array(smfish, ndim=2, dtype=[np.uint8, np.uint16])
stack.check_array(rna_coord, ndim=2, dtype=np.int64)
stack.check_array(distance_centroid_rna, ndim=2,
dtype=[np.float16, np.float32, np.float64])
stack.check_array(centroid_rna, ndim=1, dtype=np.int64)
stack.check_array(cell_mask, ndim=2, dtype=bool)
stack.check_array(centroid_cell, ndim=1, dtype=np.int64)
stack.check_array(centroid_nuc, ndim=1, dtype=np.int64)

# case where no mRNAs are detected
if len(rna_coord) == 0:
features = 1.
features = (0., 1., 1.)
return features

# initialization
if ndim == 3:
centroid_rna_2d = centroid_rna[1:]
else:
centroid_rna_2d = centroid_rna.copy()

# get coordinates of each pixel of the cell
cell_coord = np.nonzero(cell_mask)
cell_coord = np.column_stack(cell_coord)

# get intensity value of every pixels and its random counterpart
cell_value = smfish[cell_mask]
total_intensity = cell_value.sum()
cell_value_random = np.ones_like(cell_value)
total_intensity_random = cell_value_random.sum()

# compute polarization index from cell centroid
centroid_distance = np.linalg.norm(centroid_rna_2d - centroid_cell)
gyration_radius = _rmsd(cell_coord, centroid_cell)
index_polarization = centroid_distance / gyration_radius

features = (index_polarization,)

# compute dispersion index
a = distance_centroid_rna[rna_coord[:, ndim - 2],
rna_coord[:, ndim - 1]]
b = distance_centroid_rna[cell_coord[:, 0],
cell_coord[:, 1]]
index_dispersion = a.mean() / b.mean()
r = np.linalg.norm(cell_coord - centroid_rna_2d, axis=1) ** 2
a = np.sum((r * cell_value) / total_intensity)
b = np.sum((r * cell_value_random) / total_intensity_random)
index_dispersion = a / b

features += (index_dispersion,)

# compute peripheral dispersion index
r = np.linalg.norm(cell_coord - centroid_nuc, axis=1) ** 2
a = np.sum((r * cell_value) / total_intensity)
b = np.sum((r * cell_value_random) / total_intensity_random)
index_peripheral_dispersion = a / b

return index_dispersion
features += (index_peripheral_dispersion,)

return features

def features_peripheral_dispersion(rna_coord, distance_centroid_cell,
cell_mask, ndim, check_input=True):
"""Compute a peripheral dispersion index.

# TODO add reference to RDI paper
def _rmsd(coord, reference_coord):
"""Compute the root-mean-squared distance between coordinates and a
reference coordinate.
Parameters
----------
rna_coord : np.ndarray, np.int64
Coordinates of the detected RNAs with zyx or yx coordinates in the
first 3 or 2 columns.
distance_centroid_cell : np.ndarray, np.float32
Distance map from the cell centroid with shape (y, x).
cell_mask : np.ndarray, bool
Surface of the cell with shape (y, x).
ndim : int
Number of spatial dimensions to consider.
check_input : bool
Check input validity.
coord : np.ndarray, np.int64
Coordinates with shape (nb_points, 2).
reference_coord : np.ndarray, np.int64
Reference coordinate to compute the distance from, with shape (1, 2).
Returns
-------
index_peripheral_dispersion : float
Peripheral dispersion index.
rmsd : float
Root-mean-squared distance.
"""
# check parameters
stack.check_parameter(check_input=bool)
if check_input:
stack.check_parameter(ndim=int)
if ndim not in [2, 3]:
raise ValueError("'ndim' should be 2 or 3, not {0}.".format(ndim))
stack.check_array(rna_coord, ndim=2, dtype=np.int64)
stack.check_array(distance_centroid_cell, ndim=2,
dtype=[np.float16, np.float32, np.float64])
stack.check_array(cell_mask, ndim=2, dtype=bool)

# case where no mRNAs are detected
if len(rna_coord) == 0:
features = 1.
return features

# get coordinates of each pixel of the cell
cell_coord = np.nonzero(cell_mask)
cell_coord = np.column_stack(cell_coord)

# compute peripheral dispersion index
a = distance_centroid_cell[rna_coord[:, ndim - 2],
rna_coord[:, ndim - 1]]
b = distance_centroid_cell[cell_coord[:, 0],
cell_coord[:, 1]]
index_peripheral_dispersion = a.mean() / b.mean()
# compute RMSD between 'coord' and 'reference_coord'
n = len(coord)
diff = coord - reference_coord
rmsd = float(np.sqrt((diff ** 2).sum() / n))

return index_peripheral_dispersion
return rmsd


def features_topography(rna_coord, cell_mask, nuc_mask, cell_mask_out_nuc,
Expand Down
Loading

0 comments on commit eadcab1

Please sign in to comment.