From 1d5fc0fc4b091cc63755f6fa3e76db7904acfe87 Mon Sep 17 00:00:00 2001 From: ArneD Date: Wed, 8 Jan 2025 07:58:00 +0100 Subject: [PATCH] Sc 41 optimize flowsom clustering (#81) * SC_41_z_score * SC_41 plot clusters * SC_41 flowsom * SC_41 flowsom clusering visualization * SC_41 test flowsom * SC_41 docs * SC_41 update docstring * SC_41 docs --- docs/api.md | 3 + ...lowSOM_for_pixel_and_cell_clustering.ipynb | 1409 ++--------------- .../_tests/test_plot/test_plot_flowsom.py | 68 + .../image/pixel_clustering/_preprocess.py | 59 +- src/harpy/io/_visium_hd.py | 2 - src/harpy/plot/__init__.py | 1 + src/harpy/plot/_flowsom.py | 277 ++++ src/harpy/plot/_plot.py | 8 +- .../pixel_clustering/_cluster_intensity.py | 8 + 9 files changed, 518 insertions(+), 1317 deletions(-) create mode 100644 src/harpy/_tests/test_plot/test_plot_flowsom.py create mode 100644 src/harpy/plot/_flowsom.py diff --git a/docs/api.md b/docs/api.md index 74f6bf8..662074c 100644 --- a/docs/api.md +++ b/docs/api.md @@ -167,6 +167,9 @@ Plotting functions. .. autosummary:: :toctree: generated + pl.pixel_clusters + pl.pixel_clusters_heatmap + pl.snr_ratio pl.group_snr_ratio pl.snr_clustermap diff --git a/docs/tutorials/general/FlowSOM_for_pixel_and_cell_clustering.ipynb b/docs/tutorials/general/FlowSOM_for_pixel_and_cell_clustering.ipynb index 0b8a95a..bf87146 100644 --- a/docs/tutorials/general/FlowSOM_for_pixel_and_cell_clustering.ipynb +++ b/docs/tutorials/general/FlowSOM_for_pixel_and_cell_clustering.ipynb @@ -9,26 +9,15 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/arnedf/miniconda3/envs/harpy/lib/python3.10/site-packages/dask/dataframe/__init__.py:31: FutureWarning: The legacy Dask DataFrame implementation is deprecated and will be removed in a future version. Set the configuration option `dataframe.query-planning` to `True` or None to enable the new Dask Dataframe implementation and silence this warning.\n", - " warnings.warn(\n" - ] - } - ], + "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "\n", "import harpy as hp\n", "from harpy.datasets import pixie_example\n", - "from harpy.table.cell_clustering._utils import _export_to_ark_format as _export_to_ark_format_cells\n", - "from harpy.table.pixel_clustering._cluster_intensity import _export_to_ark_format as _export_to_ark_format_pixels\n", "from harpy.utils._keys import ClusteringKey" ] }, @@ -41,72 +30,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/arnedf/miniconda3/envs/harpy/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n", - "2024-12-11 11:14:28,429 - harpy.image._manager - WARNING - No dims parameter specified. Assuming order of dimension of provided array is ((c), (z), y, x)\n", - "2024-12-11 11:14:28,551 - harpy.image._manager - INFO - Writing results to layer 'raw_image_fov0'\n", - "2024-12-11 11:14:28,555 - harpy.image._manager - WARNING - No dims parameter specified. Assuming order of dimension of provided array is ((c), (z), y, x)\n", - "2024-12-11 11:14:28,560 - harpy.image._manager - INFO - Writing results to layer 'label_nuclear_fov0'\n", - "2024-12-11 11:14:28,563 - harpy.image._manager - WARNING - No dims parameter specified. Assuming order of dimension of provided array is ((c), (z), y, x)\n", - "2024-12-11 11:14:28,567 - harpy.image._manager - INFO - Writing results to layer 'label_whole_fov0'\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/Users/arnedf/.cache/huggingface/datasets/downloads/extracted/ed276a09a07145a5c25cd3c0a3fd99368fc2f3387300f55927c0b600c043de39/post_clustering\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-12-11 11:14:28,623 - harpy.image._manager - WARNING - No dims parameter specified. Assuming order of dimension of provided array is ((c), (z), y, x)\n", - "2024-12-11 11:14:28,717 - harpy.image._manager - INFO - Writing results to layer 'raw_image_fov1'\n", - "2024-12-11 11:14:28,721 - harpy.image._manager - WARNING - No dims parameter specified. Assuming order of dimension of provided array is ((c), (z), y, x)\n", - "2024-12-11 11:14:28,727 - harpy.image._manager - INFO - Writing results to layer 'label_nuclear_fov1'\n", - "2024-12-11 11:14:28,730 - harpy.image._manager - WARNING - No dims parameter specified. Assuming order of dimension of provided array is ((c), (z), y, x)\n", - "2024-12-11 11:14:28,735 - harpy.image._manager - INFO - Writing results to layer 'label_whole_fov1'\n", - "/Users/arnedf/miniconda3/envs/harpy/lib/python3.10/site-packages/anndata/_core/aligned_df.py:67: ImplicitModificationWarning: Transforming to str index.\n", - " warnings.warn(\"Transforming to str index.\", ImplicitModificationWarning)\n", - "/Users/arnedf/miniconda3/envs/harpy/lib/python3.10/site-packages/spatialdata/models/models.py:1046: ImplicitModificationWarning: Trying to modify attribute `._uns` of view, initializing view as actual.\n", - " adata.uns[cls.ATTRS_KEY] = attr\n" - ] - }, - { - "data": { - "text/plain": [ - "SpatialData object\n", - "├── Images\n", - "│ ├── 'raw_image_fov0': DataArray[cyx] (22, 512, 512)\n", - "│ └── 'raw_image_fov1': DataArray[cyx] (22, 1024, 1024)\n", - "├── Labels\n", - "│ ├── 'label_nuclear_fov0': DataArray[yx] (512, 512)\n", - "│ ├── 'label_nuclear_fov1': DataArray[yx] (1024, 1024)\n", - "│ ├── 'label_whole_fov0': DataArray[yx] (512, 512)\n", - "│ └── 'label_whole_fov1': DataArray[yx] (1024, 1024)\n", - "└── Tables\n", - " └── 'table': AnnData (1414, 22)\n", - "with coordinate systems:\n", - " ▸ 'fov0', with elements:\n", - " raw_image_fov0 (Images), label_nuclear_fov0 (Labels), label_whole_fov0 (Labels)\n", - " ▸ 'fov1', with elements:\n", - " raw_image_fov1 (Images), label_nuclear_fov1 (Labels), label_whole_fov1 (Labels)" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "sdata_ark_analysis = pixie_example([\"fov0\", \"fov1\"])\n", "sdata_ark_analysis" @@ -114,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -138,49 +64,18 @@ "]" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Preprocess" + ] + }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-12-11 11:14:29,159 - harpy.image._manager - WARNING - No dims parameter specified. Assuming order of dimension of provided array is ((c), (z), y, x)\n", - "2024-12-11 11:14:29,929 - harpy.image._manager - INFO - Writing results to layer 'raw_image_fov0_processed'\n", - "2024-12-11 11:14:29,931 - harpy.image._manager - WARNING - No dims parameter specified. Assuming order of dimension of provided array is ((c), (z), y, x)\n", - "2024-12-11 11:14:30,716 - harpy.image._manager - INFO - Writing results to layer 'raw_image_fov1_processed'\n" - ] - }, - { - "data": { - "text/plain": [ - "SpatialData object\n", - "├── Images\n", - "│ ├── 'raw_image_fov0': DataArray[cyx] (22, 512, 512)\n", - "│ ├── 'raw_image_fov0_processed': DataArray[cyx] (16, 512, 512)\n", - "│ ├── 'raw_image_fov1': DataArray[cyx] (22, 1024, 1024)\n", - "│ └── 'raw_image_fov1_processed': DataArray[cyx] (16, 1024, 1024)\n", - "├── Labels\n", - "│ ├── 'label_nuclear_fov0': DataArray[yx] (512, 512)\n", - "│ ├── 'label_nuclear_fov1': DataArray[yx] (1024, 1024)\n", - "│ ├── 'label_whole_fov0': DataArray[yx] (512, 512)\n", - "│ └── 'label_whole_fov1': DataArray[yx] (1024, 1024)\n", - "└── Tables\n", - " └── 'table': AnnData (1414, 22)\n", - "with coordinate systems:\n", - " ▸ 'fov0', with elements:\n", - " raw_image_fov0 (Images), raw_image_fov0_processed (Images), label_nuclear_fov0 (Labels), label_whole_fov0 (Labels)\n", - " ▸ 'fov1', with elements:\n", - " raw_image_fov1 (Images), raw_image_fov1_processed (Images), label_nuclear_fov1 (Labels), label_whole_fov1 (Labels)" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "sdata_ark_analysis = hp.im.pixel_clustering_preprocess(\n", " sdata_ark_analysis,\n", @@ -188,66 +83,25 @@ " output_layer=[\"raw_image_fov0_processed\", \"raw_image_fov1_processed\"],\n", " channels=channels,\n", " chunks=2048,\n", + " persist_intermediate=True, # set to False if you have multiple images, and if they are large.\n", " overwrite=True,\n", " sigma=2.0,\n", ")\n", "sdata_ark_analysis" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Pixel clustering" + ] + }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m2024-12-11 11:14:30.894\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mflowsom.main\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m84\u001b[0m - \u001b[34m\u001b[1mReading input.\u001b[0m\n", - "\u001b[32m2024-12-11 11:14:30.895\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mflowsom.main\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m86\u001b[0m - \u001b[34m\u001b[1mFitting model: clustering and metaclustering.\u001b[0m\n", - "\u001b[32m2024-12-11 11:14:32.906\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mflowsom.main\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m88\u001b[0m - \u001b[34m\u001b[1mUpdating derived values.\u001b[0m\n", - "2024-12-11 11:14:33,626 - harpy.image._manager - WARNING - No dims parameter specified. Assuming order of dimension of provided array is ((c), (z), y, x)\n", - "2024-12-11 11:14:33,629 - harpy.image._manager - INFO - Writing results to layer 'raw_image_fov0_flowsom_clusters'\n", - "2024-12-11 11:14:33,630 - harpy.image._manager - WARNING - No dims parameter specified. Assuming order of dimension of provided array is ((c), (z), y, x)\n", - "2024-12-11 11:14:33,633 - harpy.image._manager - INFO - Writing results to layer 'raw_image_fov0_flowsom_metaclusters'\n", - "2024-12-11 11:14:33,951 - harpy.image._manager - WARNING - No dims parameter specified. Assuming order of dimension of provided array is ((c), (z), y, x)\n", - "2024-12-11 11:14:33,957 - harpy.image._manager - INFO - Writing results to layer 'raw_image_fov1_flowsom_clusters'\n", - "2024-12-11 11:14:33,958 - harpy.image._manager - WARNING - No dims parameter specified. Assuming order of dimension of provided array is ((c), (z), y, x)\n", - "2024-12-11 11:14:33,961 - harpy.image._manager - INFO - Writing results to layer 'raw_image_fov1_flowsom_metaclusters'\n" - ] - }, - { - "data": { - "text/plain": [ - "SpatialData object\n", - "├── Images\n", - "│ ├── 'raw_image_fov0': DataArray[cyx] (22, 512, 512)\n", - "│ ├── 'raw_image_fov0_processed': DataArray[cyx] (16, 512, 512)\n", - "│ ├── 'raw_image_fov1': DataArray[cyx] (22, 1024, 1024)\n", - "│ └── 'raw_image_fov1_processed': DataArray[cyx] (16, 1024, 1024)\n", - "├── Labels\n", - "│ ├── 'label_nuclear_fov0': DataArray[yx] (512, 512)\n", - "│ ├── 'label_nuclear_fov1': DataArray[yx] (1024, 1024)\n", - "│ ├── 'label_whole_fov0': DataArray[yx] (512, 512)\n", - "│ ├── 'label_whole_fov1': DataArray[yx] (1024, 1024)\n", - "│ ├── 'raw_image_fov0_flowsom_clusters': DataArray[yx] (512, 512)\n", - "│ ├── 'raw_image_fov0_flowsom_metaclusters': DataArray[yx] (512, 512)\n", - "│ ├── 'raw_image_fov1_flowsom_clusters': DataArray[yx] (1024, 1024)\n", - "│ └── 'raw_image_fov1_flowsom_metaclusters': DataArray[yx] (1024, 1024)\n", - "└── Tables\n", - " └── 'table': AnnData (1414, 22)\n", - "with coordinate systems:\n", - " ▸ 'fov0', with elements:\n", - " raw_image_fov0 (Images), raw_image_fov0_processed (Images), label_nuclear_fov0 (Labels), label_whole_fov0 (Labels), raw_image_fov0_flowsom_clusters (Labels), raw_image_fov0_flowsom_metaclusters (Labels)\n", - " ▸ 'fov1', with elements:\n", - " raw_image_fov1 (Images), raw_image_fov1_processed (Images), label_nuclear_fov1 (Labels), label_whole_fov1 (Labels), raw_image_fov1_flowsom_clusters (Labels), raw_image_fov1_flowsom_metaclusters (Labels)" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import flowsom as fs\n", "from dask.distributed import Client, LocalCluster\n", @@ -286,6 +140,8 @@ " num_batches = 10,\n", " xdim=10,\n", " ydim=10,\n", + " z_score=True,\n", + " z_cap=3,\n", " persist_intermediate=True,\n", " overwrite=True,\n", ")\n", @@ -294,62 +150,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/arnedf/VIB/harpy/src/harpy/table/_allocation_intensity.py:217: ImplicitModificationWarning: Setting element `.obsm['spatial']` of view, initializing view as actual.\n", - " adata.obsm[_SPATIAL] = coordinates\n", - "/Users/arnedf/VIB/harpy/src/harpy/table/_allocation_intensity.py:217: ImplicitModificationWarning: Setting element `.obsm['spatial']` of view, initializing view as actual.\n", - " adata.obsm[_SPATIAL] = coordinates\n", - "/Users/arnedf/miniconda3/envs/harpy/lib/python3.10/site-packages/spatialdata/_core/_elements.py:116: UserWarning: Key `counts_clusters` already exists. Overwriting it in-memory.\n", - " self._check_key(key, self.keys(), self._shared_keys)\n", - "2024-12-11 11:14:35,924 - harpy.table._preprocess - INFO - Calculating cell size from provided labels_layer 'raw_image_fov0_flowsom_clusters'\n", - "2024-12-11 11:14:35,941 - harpy.table._preprocess - INFO - Calculating cell size from provided labels_layer 'raw_image_fov1_flowsom_clusters'\n", - "/Users/arnedf/miniconda3/envs/harpy/lib/python3.10/site-packages/spatialdata/models/models.py:1048: UserWarning: Converting `region_key: fov_labels` to categorical dtype.\n", - " return convert_region_column_to_categorical(adata)\n", - "/Users/arnedf/miniconda3/envs/harpy/lib/python3.10/site-packages/spatialdata/_core/_elements.py:116: UserWarning: Key `counts_clusters` already exists. Overwriting it in-memory.\n", - " self._check_key(key, self.keys(), self._shared_keys)\n", - "/Users/arnedf/miniconda3/envs/harpy/lib/python3.10/site-packages/spatialdata/_core/_elements.py:116: UserWarning: Key `counts_clusters` already exists. Overwriting it in-memory.\n", - " self._check_key(key, self.keys(), self._shared_keys)\n" - ] - }, - { - "data": { - "text/plain": [ - "SpatialData object\n", - "├── Images\n", - "│ ├── 'raw_image_fov0': DataArray[cyx] (22, 512, 512)\n", - "│ ├── 'raw_image_fov0_processed': DataArray[cyx] (16, 512, 512)\n", - "│ ├── 'raw_image_fov1': DataArray[cyx] (22, 1024, 1024)\n", - "│ └── 'raw_image_fov1_processed': DataArray[cyx] (16, 1024, 1024)\n", - "├── Labels\n", - "│ ├── 'label_nuclear_fov0': DataArray[yx] (512, 512)\n", - "│ ├── 'label_nuclear_fov1': DataArray[yx] (1024, 1024)\n", - "│ ├── 'label_whole_fov0': DataArray[yx] (512, 512)\n", - "│ ├── 'label_whole_fov1': DataArray[yx] (1024, 1024)\n", - "│ ├── 'raw_image_fov0_flowsom_clusters': DataArray[yx] (512, 512)\n", - "│ ├── 'raw_image_fov0_flowsom_metaclusters': DataArray[yx] (512, 512)\n", - "│ ├── 'raw_image_fov1_flowsom_clusters': DataArray[yx] (1024, 1024)\n", - "│ └── 'raw_image_fov1_flowsom_metaclusters': DataArray[yx] (1024, 1024)\n", - "└── Tables\n", - " ├── 'counts_clusters': AnnData (100, 16)\n", - " └── 'table': AnnData (1414, 22)\n", - "with coordinate systems:\n", - " ▸ 'fov0', with elements:\n", - " raw_image_fov0 (Images), raw_image_fov0_processed (Images), label_nuclear_fov0 (Labels), label_whole_fov0 (Labels), raw_image_fov0_flowsom_clusters (Labels), raw_image_fov0_flowsom_metaclusters (Labels)\n", - " ▸ 'fov1', with elements:\n", - " raw_image_fov1 (Images), raw_image_fov1_processed (Images), label_nuclear_fov1 (Labels), label_whole_fov1 (Labels), raw_image_fov1_flowsom_clusters (Labels), raw_image_fov1_flowsom_metaclusters (Labels)" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "sdata_ark_analysis = hp.tb.cluster_intensity(\n", " sdata_ark_analysis,\n", @@ -363,64 +166,65 @@ "sdata_ark_analysis" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Visualization of pixel clusters and metaclusters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hp.pl.pixel_clusters(\n", + " sdata_ark_analysis,\n", + " labels_layer=\"raw_image_fov0_flowsom_metaclusters\",\n", + " figsize=(5, 5),\n", + " to_coordinate_system=\"fov0\",\n", + " render_labels_kwargs={ \"alpha\":1 }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Heatmap of channel intensity per cluster and metacluster" + ] + }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-12-11 11:14:36,153 - harpy.table._preprocess - INFO - Calculating cell size from provided labels_layer 'label_whole_fov0'\n", - "2024-12-11 11:14:36,170 - harpy.table._preprocess - INFO - Calculating cell size from provided labels_layer 'label_whole_fov1'\n", - "/Users/arnedf/miniconda3/envs/harpy/lib/python3.10/site-packages/spatialdata/models/models.py:1048: UserWarning: Converting `region_key: fov_labels` to categorical dtype.\n", - " return convert_region_column_to_categorical(adata)\n", - "/Users/arnedf/miniconda3/envs/harpy/lib/python3.10/site-packages/spatialdata/_core/_elements.py:116: UserWarning: Key `table_cell_clustering_flowsom` already exists. Overwriting it in-memory.\n", - " self._check_key(key, self.keys(), self._shared_keys)\n", - "\u001b[32m2024-12-11 11:14:36.219\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mflowsom.main\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m84\u001b[0m - \u001b[34m\u001b[1mReading input.\u001b[0m\n", - "\u001b[32m2024-12-11 11:14:36.220\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mflowsom.main\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m86\u001b[0m - \u001b[34m\u001b[1mFitting model: clustering and metaclustering.\u001b[0m\n", - "\u001b[32m2024-12-11 11:14:36.227\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mflowsom.main\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m88\u001b[0m - \u001b[34m\u001b[1mUpdating derived values.\u001b[0m\n", - "2024-12-11 11:14:36,347 - harpy.table.cell_clustering._clustering - INFO - Adding mean cluster intensity to '.uns['clustering']'\n", - "2024-12-11 11:14:36,362 - harpy.table.cell_clustering._clustering - INFO - Adding mean cluster intensity to '.uns['metaclustering']'\n", - "/Users/arnedf/miniconda3/envs/harpy/lib/python3.10/site-packages/spatialdata/_core/_elements.py:116: UserWarning: Key `table_cell_clustering_flowsom` already exists. Overwriting it in-memory.\n", - " self._check_key(key, self.keys(), self._shared_keys)\n" - ] - }, - { - "data": { - "text/plain": [ - "SpatialData object\n", - "├── Images\n", - "│ ├── 'raw_image_fov0': DataArray[cyx] (22, 512, 512)\n", - "│ ├── 'raw_image_fov0_processed': DataArray[cyx] (16, 512, 512)\n", - "│ ├── 'raw_image_fov1': DataArray[cyx] (22, 1024, 1024)\n", - "│ └── 'raw_image_fov1_processed': DataArray[cyx] (16, 1024, 1024)\n", - "├── Labels\n", - "│ ├── 'label_nuclear_fov0': DataArray[yx] (512, 512)\n", - "│ ├── 'label_nuclear_fov1': DataArray[yx] (1024, 1024)\n", - "│ ├── 'label_whole_fov0': DataArray[yx] (512, 512)\n", - "│ ├── 'label_whole_fov1': DataArray[yx] (1024, 1024)\n", - "│ ├── 'raw_image_fov0_flowsom_clusters': DataArray[yx] (512, 512)\n", - "│ ├── 'raw_image_fov0_flowsom_metaclusters': DataArray[yx] (512, 512)\n", - "│ ├── 'raw_image_fov1_flowsom_clusters': DataArray[yx] (1024, 1024)\n", - "│ └── 'raw_image_fov1_flowsom_metaclusters': DataArray[yx] (1024, 1024)\n", - "└── Tables\n", - " ├── 'counts_clusters': AnnData (100, 16)\n", - " ├── 'table': AnnData (1414, 22)\n", - " └── 'table_cell_clustering_flowsom': AnnData (1409, 20)\n", - "with coordinate systems:\n", - " ▸ 'fov0', with elements:\n", - " raw_image_fov0 (Images), raw_image_fov0_processed (Images), label_nuclear_fov0 (Labels), label_whole_fov0 (Labels), raw_image_fov0_flowsom_clusters (Labels), raw_image_fov0_flowsom_metaclusters (Labels)\n", - " ▸ 'fov1', with elements:\n", - " raw_image_fov1 (Images), raw_image_fov1_processed (Images), label_nuclear_fov1 (Labels), label_whole_fov1 (Labels), raw_image_fov1_flowsom_clusters (Labels), raw_image_fov1_flowsom_metaclusters (Labels)" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], + "source": [ + "for _metaclusters in [True, False]:\n", + " hp.pl.pixel_clusters_heatmap(\n", + " sdata_ark_analysis,\n", + " table_layer=\"counts_clusters\",\n", + " figsize=(8, 8),\n", + " fig_kwargs={\"dpi\": 100},\n", + " linewidths=0.001,\n", + " metaclusters=_metaclusters,\n", + " z_score=True,\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cell clustering" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "batch_model=fs.models.BatchFlowSOMEstimator\n", "\n", @@ -441,56 +245,18 @@ "sdata_ark_analysis" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Optional export to a .csv format that can be used for visualization using the ark gui" + ] + }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-12-11 11:14:36,436 - harpy.table.cell_clustering._weighted_channel_expression - INFO - Adding mean over obtained cell clusters '(clustering)' of the average marker expression for each cell weighted by pixel cluster count to '.uns[ 'clustering_channels' ]' of table layer 'table_cell_clustering_flowsom'\n", - "2024-12-11 11:14:36,437 - harpy.table.cell_clustering._weighted_channel_expression - INFO - Adding mean over obtained cell clusters '(metaclustering)' of the average marker expression for each cell weighted by pixel cluster count to '.uns[ 'metaclustering_channels' ]' of table layer 'table_cell_clustering_flowsom'\n", - "2024-12-11 11:14:36,438 - harpy.table.cell_clustering._weighted_channel_expression - INFO - Adding average marker expression for each cell weighted by pixel cluster count to '.obs' of table layer 'table_cell_clustering_flowsom'\n", - "/Users/arnedf/miniconda3/envs/harpy/lib/python3.10/site-packages/spatialdata/_core/_elements.py:116: UserWarning: Key `table_cell_clustering_flowsom` already exists. Overwriting it in-memory.\n", - " self._check_key(key, self.keys(), self._shared_keys)\n" - ] - }, - { - "data": { - "text/plain": [ - "SpatialData object\n", - "├── Images\n", - "│ ├── 'raw_image_fov0': DataArray[cyx] (22, 512, 512)\n", - "│ ├── 'raw_image_fov0_processed': DataArray[cyx] (16, 512, 512)\n", - "│ ├── 'raw_image_fov1': DataArray[cyx] (22, 1024, 1024)\n", - "│ └── 'raw_image_fov1_processed': DataArray[cyx] (16, 1024, 1024)\n", - "├── Labels\n", - "│ ├── 'label_nuclear_fov0': DataArray[yx] (512, 512)\n", - "│ ├── 'label_nuclear_fov1': DataArray[yx] (1024, 1024)\n", - "│ ├── 'label_whole_fov0': DataArray[yx] (512, 512)\n", - "│ ├── 'label_whole_fov1': DataArray[yx] (1024, 1024)\n", - "│ ├── 'raw_image_fov0_flowsom_clusters': DataArray[yx] (512, 512)\n", - "│ ├── 'raw_image_fov0_flowsom_metaclusters': DataArray[yx] (512, 512)\n", - "│ ├── 'raw_image_fov1_flowsom_clusters': DataArray[yx] (1024, 1024)\n", - "│ └── 'raw_image_fov1_flowsom_metaclusters': DataArray[yx] (1024, 1024)\n", - "└── Tables\n", - " ├── 'counts_clusters': AnnData (100, 16)\n", - " ├── 'table': AnnData (1414, 22)\n", - " └── 'table_cell_clustering_flowsom': AnnData (1409, 20)\n", - "with coordinate systems:\n", - " ▸ 'fov0', with elements:\n", - " raw_image_fov0 (Images), raw_image_fov0_processed (Images), label_nuclear_fov0 (Labels), label_whole_fov0 (Labels), raw_image_fov0_flowsom_clusters (Labels), raw_image_fov0_flowsom_metaclusters (Labels)\n", - " ▸ 'fov1', with elements:\n", - " raw_image_fov1 (Images), raw_image_fov1_processed (Images), label_nuclear_fov1 (Labels), label_whole_fov1 (Labels), raw_image_fov1_flowsom_clusters (Labels), raw_image_fov1_flowsom_metaclusters (Labels)" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# weighted channel average for visualization -> calculate this on the flowsom clustered matrix\n", "sdata_ark_analysis = hp.tb.weighted_channel_expression(\n", @@ -506,410 +272,13 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-12-11 11:14:36,543 - harpy.table.cell_clustering._utils - WARNING - Increasing cell cluster IDs (SOM cluster and meta cluster IDs) with +1 for visualization. The underlying dataframe in the SpatialData object remains unchanges.\n", - "2024-12-11 11:14:36,547 - harpy.table.cell_clustering._utils - WARNING - Increasing cell cluster IDs (SOM cluster and meta cluster IDs) with +1 for visualization. The underlying dataframe in the SpatialData object remains unchanges.\n", - "2024-12-11 11:14:36,550 - harpy.table.cell_clustering._utils - WARNING - Increasing cell cluster IDs (SOM cluster and meta cluster IDs) with +1 for visualization. The underlying dataframe in the SpatialData object remains unchanges.\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
channelsCD3CD4CD8CD14CD20CD31CD45CD68CD163CK17Collagen1FibronectinECADHLADRSMAVimpixel_meta_clusterpixel_som_clustercount
cells
1_counts_clusters_a34405d379.66220555.9912343.3399384.1083383.9763430.57864440.2463951.9071122.7544901.7334137.4019934.5661872.1533632.4621321.2294845.9934195111752
2_counts_clusters_a34405d344.35777678.5219971.9931996.0194382.2799560.57527129.5731882.6531704.1497421.8208659.8753896.7792302.4662043.4651801.8391199.828461529440
3_counts_clusters_a34405d360.86215717.3230387.6044556.9904122.8710161.42209621.4768513.5472636.2897043.32512819.66454312.0884497.1487163.9250605.01166812.711450437020
4_counts_clusters_a34405d313.08027511.2726895.6815797.3509617.3383592.76609579.5445183.1257686.2299593.22659710.63793811.2251323.9571286.4224672.36376612.7597841747375
5_counts_clusters_a34405d349.93838110.92194849.4067464.9596345.2087441.20017853.3612401.8728393.8098602.5369135.4828535.0990182.0562582.5751061.0563008.7670882511025
............................................................
96_counts_clusters_a34405d33.98103913.3230103.21122041.0659971.5247071.30187912.6727456.0056959.9318681.65469034.96505312.1880162.67817911.8580546.4032229.09159999615057
97_counts_clusters_a34405d32.2455299.5326622.11879662.2867250.7820660.6525489.3863543.8244798.6701701.04088641.99066911.3650511.0149637.7345253.3828134.56949599711901
98_counts_clusters_a34405d32.1915475.7924741.79925020.2172240.7842720.4659564.8386693.8318046.0034691.15757079.6236469.8863051.2228714.7358373.0831844.082930119815650
99_counts_clusters_a34405d32.0080142.8852562.4353224.1627900.7420800.3148102.0692281.8868252.1137931.124537101.7313248.5166211.0739641.2647902.6453042.994190119925505
100_counts_clusters_a34405d30.7702091.0024781.1193471.1188500.3023100.0872090.5791030.7721000.7376100.875762118.2018143.5223980.3575180.3785390.8371771.1178731110021010
\n", - "

100 rows × 19 columns

\n", - "
" - ], - "text/plain": [ - "channels CD3 CD4 CD8 CD14 \\\n", - "cells \n", - "1_counts_clusters_a34405d3 79.662205 55.991234 3.339938 4.108338 \n", - "2_counts_clusters_a34405d3 44.357776 78.521997 1.993199 6.019438 \n", - "3_counts_clusters_a34405d3 60.862157 17.323038 7.604455 6.990412 \n", - "4_counts_clusters_a34405d3 13.080275 11.272689 5.681579 7.350961 \n", - "5_counts_clusters_a34405d3 49.938381 10.921948 49.406746 4.959634 \n", - "... ... ... ... ... \n", - "96_counts_clusters_a34405d3 3.981039 13.323010 3.211220 41.065997 \n", - "97_counts_clusters_a34405d3 2.245529 9.532662 2.118796 62.286725 \n", - "98_counts_clusters_a34405d3 2.191547 5.792474 1.799250 20.217224 \n", - "99_counts_clusters_a34405d3 2.008014 2.885256 2.435322 4.162790 \n", - "100_counts_clusters_a34405d3 0.770209 1.002478 1.119347 1.118850 \n", - "\n", - "channels CD20 CD31 CD45 CD68 \\\n", - "cells \n", - "1_counts_clusters_a34405d3 3.976343 0.578644 40.246395 1.907112 \n", - "2_counts_clusters_a34405d3 2.279956 0.575271 29.573188 2.653170 \n", - "3_counts_clusters_a34405d3 2.871016 1.422096 21.476851 3.547263 \n", - "4_counts_clusters_a34405d3 7.338359 2.766095 79.544518 3.125768 \n", - "5_counts_clusters_a34405d3 5.208744 1.200178 53.361240 1.872839 \n", - "... ... ... ... ... \n", - "96_counts_clusters_a34405d3 1.524707 1.301879 12.672745 6.005695 \n", - "97_counts_clusters_a34405d3 0.782066 0.652548 9.386354 3.824479 \n", - "98_counts_clusters_a34405d3 0.784272 0.465956 4.838669 3.831804 \n", - "99_counts_clusters_a34405d3 0.742080 0.314810 2.069228 1.886825 \n", - "100_counts_clusters_a34405d3 0.302310 0.087209 0.579103 0.772100 \n", - "\n", - "channels CD163 CK17 Collagen1 Fibronectin \\\n", - "cells \n", - "1_counts_clusters_a34405d3 2.754490 1.733413 7.401993 4.566187 \n", - "2_counts_clusters_a34405d3 4.149742 1.820865 9.875389 6.779230 \n", - "3_counts_clusters_a34405d3 6.289704 3.325128 19.664543 12.088449 \n", - "4_counts_clusters_a34405d3 6.229959 3.226597 10.637938 11.225132 \n", - "5_counts_clusters_a34405d3 3.809860 2.536913 5.482853 5.099018 \n", - "... ... ... ... ... \n", - "96_counts_clusters_a34405d3 9.931868 1.654690 34.965053 12.188016 \n", - "97_counts_clusters_a34405d3 8.670170 1.040886 41.990669 11.365051 \n", - "98_counts_clusters_a34405d3 6.003469 1.157570 79.623646 9.886305 \n", - "99_counts_clusters_a34405d3 2.113793 1.124537 101.731324 8.516621 \n", - "100_counts_clusters_a34405d3 0.737610 0.875762 118.201814 3.522398 \n", - "\n", - "channels ECAD HLADR SMA Vim \\\n", - "cells \n", - "1_counts_clusters_a34405d3 2.153363 2.462132 1.229484 5.993419 \n", - "2_counts_clusters_a34405d3 2.466204 3.465180 1.839119 9.828461 \n", - "3_counts_clusters_a34405d3 7.148716 3.925060 5.011668 12.711450 \n", - "4_counts_clusters_a34405d3 3.957128 6.422467 2.363766 12.759784 \n", - "5_counts_clusters_a34405d3 2.056258 2.575106 1.056300 8.767088 \n", - "... ... ... ... ... \n", - "96_counts_clusters_a34405d3 2.678179 11.858054 6.403222 9.091599 \n", - "97_counts_clusters_a34405d3 1.014963 7.734525 3.382813 4.569495 \n", - "98_counts_clusters_a34405d3 1.222871 4.735837 3.083184 4.082930 \n", - "99_counts_clusters_a34405d3 1.073964 1.264790 2.645304 2.994190 \n", - "100_counts_clusters_a34405d3 0.357518 0.378539 0.837177 1.117873 \n", - "\n", - "channels pixel_meta_cluster pixel_som_cluster count \n", - "cells \n", - "1_counts_clusters_a34405d3 5 1 11752 \n", - "2_counts_clusters_a34405d3 5 2 9440 \n", - "3_counts_clusters_a34405d3 4 3 7020 \n", - "4_counts_clusters_a34405d3 17 4 7375 \n", - "5_counts_clusters_a34405d3 2 5 11025 \n", - "... ... ... ... \n", - "96_counts_clusters_a34405d3 9 96 15057 \n", - "97_counts_clusters_a34405d3 9 97 11901 \n", - "98_counts_clusters_a34405d3 11 98 15650 \n", - "99_counts_clusters_a34405d3 11 99 25505 \n", - "100_counts_clusters_a34405d3 11 100 21010 \n", - "\n", - "[100 rows x 19 columns]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ + "from harpy.table.cell_clustering._utils import _export_to_ark_format as _export_to_ark_format_cells\n", + "from harpy.table.pixel_clustering._cluster_intensity import _export_to_ark_format as _export_to_ark_format_pixels\n", + "\n", "df = _export_to_ark_format_pixels(adata=sdata_ark_analysis[\"counts_clusters\"], output=None)\n", "(\n", " df_cell_som_cluster_count_avg,\n", @@ -921,592 +290,18 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
channelscell_meta_clusterCD3CD4CD8CD14CD20CD31CD45CD68CD163CK17Collagen1FibronectinECADHLADRSMAVimcell_meta_cluster_rename
019.92022410.6362199.05940911.9959576.4044757.60064119.9818695.3539487.5671235.64077717.39297313.3677315.7941826.9078279.51690719.2600041
129.96020715.3457976.05049421.2873345.2331852.11743417.0842386.75004113.7141622.03944920.46866916.1439884.0651609.3223426.87604014.5030862
2320.95320926.0713618.98669011.7716078.4423611.95371627.6055517.0857748.5612123.26342214.46056810.7520864.9824538.7786265.58444314.5497173
3414.59980619.1626536.73874410.32630231.6575972.13413336.2192444.0990549.2838072.69303610.2501668.8429644.35209011.5912152.72634911.2101594
452.6071733.2240161.5878354.0047211.0929020.6283333.4845412.1218402.6856960.9597128.1642925.4573383.2887602.2594383.1337043.8892965
563.95198713.0037553.17003543.0844931.7896851.11218010.43754524.98989619.4580821.39715015.6643309.8787382.17052911.5589545.50333512.9986756
675.57356310.6686883.37725818.1024512.5611744.9446199.9959035.5178639.3839941.85394718.18814814.2278813.3990897.0119849.89905138.2149067
7827.06944235.8053555.5296679.24874916.9941361.63979536.9935153.7753177.4408982.55070410.3088848.6154454.7472838.7329102.59358610.7430678
8915.02022319.0703276.53404110.17245322.0490042.58582730.7465784.1459858.1326002.86851413.84517511.2854776.53178011.3750263.93290512.9963949
9106.32578210.1420264.14083512.9778582.8430293.47775710.1211005.6957177.1053412.08598921.84403916.2934554.1205716.14369313.95161833.23246110
101113.09259917.9654364.8254968.51595042.5713541.53271640.9577573.4716408.1426022.6686368.2475257.3642003.82746411.6263551.7928679.97152111
111235.78846247.7326145.0963239.3897306.5809501.57419637.1139123.7736686.8617172.5237219.7913658.1841884.2918526.8761052.59327110.42227012
121311.26733322.1124054.43633011.2216898.5174991.82371826.4886455.3776607.8216492.31448511.74310210.0178285.97394332.9641574.12391515.39319113
13145.5999008.2174784.09311711.6153152.3935322.0727418.3199074.7406465.8629811.77194045.29646917.5104123.2730684.8446639.43458514.23737414
14154.07049010.4686842.95856620.7284783.0514261.5480278.16627965.66397615.8925511.36170912.5136477.1786382.12942810.4724264.07407014.43694715
151625.98939910.38939543.36190211.3204415.5028681.59610731.9017454.0479156.6490282.59300112.6308109.4293323.3320315.8506264.44114512.78087216
16176.17197514.1297114.45043232.9997712.9913841.44965114.67876910.30150216.2279851.97958919.92846713.7455643.30737513.0419657.40884914.39297217
17184.1606525.2332033.0016773.6942031.5804670.9390316.3756451.2748442.75388896.2535023.9033373.27162116.2860333.43060759.1064095.57244818
18195.4433195.7373144.5783006.7194221.9768352.2416706.2495172.9446136.88875117.2905417.7973425.84374763.0390984.54224911.7578544.43347019
192012.66180214.2158226.63249710.4451875.4045652.03555817.0399024.7573617.7334973.58779318.23366712.1622649.4569577.7591865.30106510.92408920
\n", - "
" - ], - "text/plain": [ - "channels cell_meta_cluster CD3 CD4 CD8 CD14 \\\n", - "0 1 9.920224 10.636219 9.059409 11.995957 \n", - "1 2 9.960207 15.345797 6.050494 21.287334 \n", - "2 3 20.953209 26.071361 8.986690 11.771607 \n", - "3 4 14.599806 19.162653 6.738744 10.326302 \n", - "4 5 2.607173 3.224016 1.587835 4.004721 \n", - "5 6 3.951987 13.003755 3.170035 43.084493 \n", - "6 7 5.573563 10.668688 3.377258 18.102451 \n", - "7 8 27.069442 35.805355 5.529667 9.248749 \n", - "8 9 15.020223 19.070327 6.534041 10.172453 \n", - "9 10 6.325782 10.142026 4.140835 12.977858 \n", - "10 11 13.092599 17.965436 4.825496 8.515950 \n", - "11 12 35.788462 47.732614 5.096323 9.389730 \n", - "12 13 11.267333 22.112405 4.436330 11.221689 \n", - "13 14 5.599900 8.217478 4.093117 11.615315 \n", - "14 15 4.070490 10.468684 2.958566 20.728478 \n", - "15 16 25.989399 10.389395 43.361902 11.320441 \n", - "16 17 6.171975 14.129711 4.450432 32.999771 \n", - "17 18 4.160652 5.233203 3.001677 3.694203 \n", - "18 19 5.443319 5.737314 4.578300 6.719422 \n", - "19 20 12.661802 14.215822 6.632497 10.445187 \n", - "\n", - "channels CD20 CD31 CD45 CD68 CD163 CK17 \\\n", - "0 6.404475 7.600641 19.981869 5.353948 7.567123 5.640777 \n", - "1 5.233185 2.117434 17.084238 6.750041 13.714162 2.039449 \n", - "2 8.442361 1.953716 27.605551 7.085774 8.561212 3.263422 \n", - "3 31.657597 2.134133 36.219244 4.099054 9.283807 2.693036 \n", - "4 1.092902 0.628333 3.484541 2.121840 2.685696 0.959712 \n", - "5 1.789685 1.112180 10.437545 24.989896 19.458082 1.397150 \n", - "6 2.561174 4.944619 9.995903 5.517863 9.383994 1.853947 \n", - "7 16.994136 1.639795 36.993515 3.775317 7.440898 2.550704 \n", - "8 22.049004 2.585827 30.746578 4.145985 8.132600 2.868514 \n", - "9 2.843029 3.477757 10.121100 5.695717 7.105341 2.085989 \n", - "10 42.571354 1.532716 40.957757 3.471640 8.142602 2.668636 \n", - "11 6.580950 1.574196 37.113912 3.773668 6.861717 2.523721 \n", - "12 8.517499 1.823718 26.488645 5.377660 7.821649 2.314485 \n", - "13 2.393532 2.072741 8.319907 4.740646 5.862981 1.771940 \n", - "14 3.051426 1.548027 8.166279 65.663976 15.892551 1.361709 \n", - "15 5.502868 1.596107 31.901745 4.047915 6.649028 2.593001 \n", - "16 2.991384 1.449651 14.678769 10.301502 16.227985 1.979589 \n", - "17 1.580467 0.939031 6.375645 1.274844 2.753888 96.253502 \n", - "18 1.976835 2.241670 6.249517 2.944613 6.888751 17.290541 \n", - "19 5.404565 2.035558 17.039902 4.757361 7.733497 3.587793 \n", - "\n", - "channels Collagen1 Fibronectin ECAD HLADR SMA Vim \\\n", - "0 17.392973 13.367731 5.794182 6.907827 9.516907 19.260004 \n", - "1 20.468669 16.143988 4.065160 9.322342 6.876040 14.503086 \n", - "2 14.460568 10.752086 4.982453 8.778626 5.584443 14.549717 \n", - "3 10.250166 8.842964 4.352090 11.591215 2.726349 11.210159 \n", - "4 8.164292 5.457338 3.288760 2.259438 3.133704 3.889296 \n", - "5 15.664330 9.878738 2.170529 11.558954 5.503335 12.998675 \n", - "6 18.188148 14.227881 3.399089 7.011984 9.899051 38.214906 \n", - "7 10.308884 8.615445 4.747283 8.732910 2.593586 10.743067 \n", - "8 13.845175 11.285477 6.531780 11.375026 3.932905 12.996394 \n", - "9 21.844039 16.293455 4.120571 6.143693 13.951618 33.232461 \n", - "10 8.247525 7.364200 3.827464 11.626355 1.792867 9.971521 \n", - "11 9.791365 8.184188 4.291852 6.876105 2.593271 10.422270 \n", - "12 11.743102 10.017828 5.973943 32.964157 4.123915 15.393191 \n", - "13 45.296469 17.510412 3.273068 4.844663 9.434585 14.237374 \n", - "14 12.513647 7.178638 2.129428 10.472426 4.074070 14.436947 \n", - "15 12.630810 9.429332 3.332031 5.850626 4.441145 12.780872 \n", - "16 19.928467 13.745564 3.307375 13.041965 7.408849 14.392972 \n", - "17 3.903337 3.271621 16.286033 3.430607 59.106409 5.572448 \n", - "18 7.797342 5.843747 63.039098 4.542249 11.757854 4.433470 \n", - "19 18.233667 12.162264 9.456957 7.759186 5.301065 10.924089 \n", - "\n", - "channels cell_meta_cluster_rename \n", - "0 1 \n", - "1 2 \n", - "2 3 \n", - "3 4 \n", - "4 5 \n", - "5 6 \n", - "6 7 \n", - "7 8 \n", - "8 9 \n", - "9 10 \n", - "10 11 \n", - "11 12 \n", - "12 13 \n", - "13 14 \n", - "14 15 \n", - "15 16 \n", - "16 17 \n", - "17 18 \n", - "18 19 \n", - "19 20 " - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df_cell_meta_cluster_channel_avg" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'region': ['label_whole_fov0', 'label_whole_fov1'],\n", - " 'region_key': 'fov_labels',\n", - " 'instance_key': 'cell_ID'}" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# \"table_cell_clustering_flowsom\" is annotated by segmentation masks, so they can be visualised using napari-spatialdata\n", "sdata_ark_analysis[ \"table_cell_clustering_flowsom\" ].uns[ \"spatialdata_attrs\" ]\n", diff --git a/src/harpy/_tests/test_plot/test_plot_flowsom.py b/src/harpy/_tests/test_plot/test_plot_flowsom.py new file mode 100644 index 0000000..6cb4bc6 --- /dev/null +++ b/src/harpy/_tests/test_plot/test_plot_flowsom.py @@ -0,0 +1,68 @@ +import importlib +import os + +import matplotlib +import pytest + +from harpy.plot._flowsom import pixel_clusters, pixel_clusters_heatmap + + +@pytest.mark.skipif(not importlib.util.find_spec("flowsom"), reason="requires the flowSOM library") +def test_plot_pixel_clusters(sdata_blobs, tmp_path): + from harpy.image.pixel_clustering._clustering import flowsom + from harpy.table.pixel_clustering._cluster_intensity import cluster_intensity + + matplotlib.use("Agg") + + img_layer = "blobs_image" + channels = ["lineage_0", "lineage_1", "lineage_5", "lineage_9"] + fraction = 0.1 + + sdata_blobs, fsom, mapping = flowsom( + sdata_blobs, + img_layer=[img_layer], + output_layer_clusters=[f"{img_layer}_clusters"], + output_layer_metaclusters=[f"{img_layer}_metaclusters"], + channels=channels, + fraction=fraction, + n_clusters=20, + random_state=100, + chunks=(1, 200, 200), + overwrite=True, + ) + + assert f"{img_layer}_clusters" in sdata_blobs.labels + assert f"{img_layer}_metaclusters" in sdata_blobs.labels + assert fsom.model._is_fitted + + pixel_clusters( + sdata_blobs, + labels_layer="blobs_image_metaclusters", + figsize=(10, 10), + coordinate_systems="global", + crd=(100, 300, 100, 300), + output=os.path.join(tmp_path, "pixel_clusters.png"), + ) + + sdata_blobs = cluster_intensity( + sdata_blobs, + mapping=mapping, + img_layer=[img_layer], + labels_layer=[f"{img_layer}_clusters"], + to_coordinate_system=["global"], + output_layer="counts_clusters", + chunks="auto", + overwrite=True, + ) + + for _metaclusters in [True, False]: + pixel_clusters_heatmap( + sdata_blobs, + table_layer="counts_clusters", + figsize=(30, 20), + fig_kwargs={"dpi": 100}, + linewidths=0.01, + metaclusters=_metaclusters, + z_score=True, + output=os.path.join(tmp_path, f"pixel_clusters_heatmap_{_metaclusters}.png"), + ) diff --git a/src/harpy/image/pixel_clustering/_preprocess.py b/src/harpy/image/pixel_clustering/_preprocess.py index 2a78dc1..775e1c4 100644 --- a/src/harpy/image/pixel_clustering/_preprocess.py +++ b/src/harpy/image/pixel_clustering/_preprocess.py @@ -1,9 +1,13 @@ from __future__ import annotations +import os +import shutil +import uuid from collections.abc import Iterable import dask.array as da import numpy as np +from dask import compute, persist from dask.array import Array from dask_image.ndfilters import gaussian_filter from spatialdata import SpatialData @@ -12,6 +16,9 @@ from harpy.image._image import _get_spatial_element, add_image_layer from harpy.image._normalize import _nonzero_nonnan_percentile, _nonzero_nonnan_percentile_axis_0 +from harpy.utils.pylogger import get_pylogger + +log = get_pylogger(__name__) def pixel_clustering_preprocess( @@ -26,6 +33,8 @@ def pixel_clustering_preprocess( norm_sum: bool = True, chunks: str | int | tuple[int, ...] | None = None, scale_factors: ScaleFactors_t | None = None, + cast_dtype: type | None = np.float32, + persist_intermediate: bool = True, overwrite: bool = False, ) -> SpatialData: """ @@ -59,6 +68,15 @@ def pixel_clustering_preprocess( Chunk sizes for processing. If provided as a tuple, it should contain chunk sizes for `c`, `(z)`, `y`, `x`. scale_factors Scale factors to apply for multiscale + persist_intermediate + If set to `True` will persist all preprocessed elements in `img_layer` in memory. + If the elements in `img_layer` are large, this could lead to increased ram usage. + Set to `False` to write to intermediate zarr store instead, which will reduce ram usage, but will increase computation time slightly. + Persist or writing to intermediate zarr store is needed, otherwise Dask will not be able to optimize the computation graph for the multiple `img_layer` use case. + Ignored if `sdata` is not backed by a zarr store, or if there is only one element in `img_layer`. + cast_dtype + Image data in `img_layer` will be casted to `dtype` before preprocessing starts. If set to None, and input image is of integer type, normalizations will lead to + data of type `numpy.float64` due to quantile normalizations, leading to increased memory usage. overwrite If `True`, overwrites existing data in `output_layer`. @@ -116,6 +134,8 @@ def pixel_clustering_preprocess( # add trivial z dimension for 2D case arr = arr[:, None, ...] to_squeeze = True + if cast_dtype is not None: + arr = arr.astype(cast_dtype) _arr_list.append(arr) if q is not None: @@ -126,6 +146,7 @@ def pixel_clustering_preprocess( results_arr_percentile.append(arr_percentile) arr_percentile = da.stack(results_arr_percentile, axis=0) arr_percentile_mean = da.mean(arr_percentile, axis=0) # mean over all images + # arr_percentile_mean has shape (#n_channels,) # for multiple img_layer, in ark one uses np.mean( arr_percentile ) as the percentile to normalize # 2) calculate norm sum percentile for img_layer @@ -191,14 +212,37 @@ def pixel_clustering_preprocess( if q_post is not None: arr_percentile_post_norm = da.stack(results_arr_percentile_post_norm, axis=0) - arr_percentile_post_norm_mean = da.mean(arr_percentile_post_norm, axis=0) + arr_percentile_post_norm_mean = da.mean( + arr_percentile_post_norm, axis=0 + ) # arr_percentil_post_mean is of shape (#n_channels,) # Now normalize each image layer by arr_percentile_post_norm and add to spatialdata object - for i in range(len(_arr_list)): - if q_post is not None: + if q_post is not None: + for i in range(len(_arr_list)): _arr_list[i] = _arr_list[i] / da.asarray(arr_percentile_post_norm_mean[..., None, None, None]) - # save the preprocessed images, in this way we get the preprocessed images from which we sample + # need to let dask do optimization of the computation graph in case there are multiple images + # otherwise will recaclulate whole preprocessing every time we add an image layer to the sdata zarr store + # should only do this if there is more than one image + clean_up = False + if len(_arr_list) > 1: + if sdata.is_backed() and not persist_intermediate: + clean_up = True + _uuid = uuid.uuid4() + for i, _arr in enumerate(_arr_list): + _intermediate_zarr_store = os.path.join(os.path.dirname(sdata.path), f"{i}_{_uuid}.zarr") + log.info(f"Preparing to write to intermediate zarr store {_intermediate_zarr_store}.") + _arr.to_zarr(_intermediate_zarr_store, compute=False) + # write to intermediate zarr store, and let dask optimize computation graph + compute(_arr_list) + # load them dask array back lazily + for i in range(len(_arr_list)): + _arr_list[i] = da.from_zarr(os.path.join(os.path.dirname(sdata.path), f"{i}_{_uuid}.zarr")) + else: + _arr_list = persist(*_arr_list) + + # save the preprocessed images, in this way we get the preprocessed images from which we sample + for i in range(len(_arr_list)): sdata = add_image_layer( sdata, arr=_arr_list[i].squeeze(1) if to_squeeze else _arr_list[i], @@ -209,6 +253,13 @@ def pixel_clustering_preprocess( overwrite=overwrite, ) + if clean_up: + # clean up the intermediate zarr store. + for i in range(len(_arr_list)): + _intermediate_zarr_store = os.path.join(os.path.dirname(sdata.path), f"{i}_{_uuid}.zarr") + log.info(f"Removing intermediate zarr store {_intermediate_zarr_store}") + shutil.rmtree(_intermediate_zarr_store) + return sdata diff --git a/src/harpy/io/_visium_hd.py b/src/harpy/io/_visium_hd.py index 845e0ad..15c7d9d 100644 --- a/src/harpy/io/_visium_hd.py +++ b/src/harpy/io/_visium_hd.py @@ -21,8 +21,6 @@ def visium_hd( Read *10x Genomics* Visium HD formatted dataset. Wrapper around `spatialdata.io.readers.visium_hd.visium_hd`, but with the resulting table annotated by a labels layer. - To use this function, please install `spatialdata_io` via this fork: https://github.com/ArneDefauw/spatialdata-io.git@visium_hd. - E.g. `pip install git+https://github.com/ArneDefauw/spatialdata-io.git@visium_hd`. .. seealso:: diff --git a/src/harpy/plot/__init__.py b/src/harpy/plot/__init__.py index 96e281c..2deeea3 100644 --- a/src/harpy/plot/__init__.py +++ b/src/harpy/plot/__init__.py @@ -4,6 +4,7 @@ from ._cluster_cleanliness import cluster_cleanliness from ._clustering import cluster from ._enrichment import nhood_enrichment +from ._flowsom import pixel_clusters, pixel_clusters_heatmap from ._plot import plot_image, plot_labels, plot_shapes from ._preprocess import preprocess_transcriptomics from ._qc_cells import plot_adata, ridgeplot_channel, ridgeplot_channel_sample diff --git a/src/harpy/plot/_flowsom.py b/src/harpy/plot/_flowsom.py new file mode 100644 index 0000000..8d65b89 --- /dev/null +++ b/src/harpy/plot/_flowsom.py @@ -0,0 +1,277 @@ +from __future__ import annotations + +import uuid +from collections.abc import Mapping +from pathlib import Path +from types import MappingProxyType +from typing import Any + +import anndata as ad +import dask.array as da +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns +from scipy.cluster.hierarchy import dendrogram, ward +from scipy.sparse import csr_matrix +from scipy.stats import zscore +from sklearn.metrics.pairwise import cosine_similarity +from spatialdata import SpatialData, bounding_box_query +from spatialdata.models import TableModel + +from harpy.image._image import _get_spatial_element +from harpy.utils._keys import _INSTANCE_KEY, _REGION_KEY, ClusteringKey +from harpy.utils.pylogger import get_pylogger + +log = get_pylogger(__name__) + +try: + import spatialdata_plot # noqa: F401 + +except ImportError: + log.warning("'spatialdata-plot' not installed, to use 'harpy.pl.plot_pixel_clusters', please install this library.") + + +def pixel_clusters( + sdata: SpatialData, + labels_layer: str, + crd: tuple[int, int, int, int] | None = None, + to_coordinate_system: str = "global", + output: str | Path | None = None, + render_labels_kwargs: Mapping[str, Any] = MappingProxyType({}), # passed to pl.render_labels + **kwargs, # passed to pl.show() of spatialdata_plot +): + """ + Visualize spatial distribution of pixel clusters based on labels in a `SpatialData` object, obtained using `harpy.im.flowsom`. + + Parameters + ---------- + sdata + The input SpatialData object. + labels_layer + The layer in `sdata` containing labels used to identify clusters. + crd + The coordinates for the region of interest in the format `(xmin, xmax, ymin, ymax)`. If `None`, the entire image is considered, by default `None`. + to_coordinate_system + Coordinate system to plot. + output + The path to save the generated plot. If `None`, the plot will be displayed directly using `plt.show()`. + render_labels_kwargs + Additional keyword arguments passed to `sdata.pl.render_labels`, such as `cmap` or `alpha`. + **kwargs + Additional keyword arguments passed to `sdata.pl.render_labels.pl.show`, such as `dpi` or `title`. + + Returns + ------- + None + The function generates and displays or saves a spatial plot of pixel clusters. + + Raises + ------ + ValueError + If the cropped spatial element derived from `crd` is `None`. + + See Also + -------- + harpy.im.flowsom + """ + se = _get_spatial_element(sdata, layer=labels_layer) + + labels_layer_crop = None + if crd is not None: + se_crop = bounding_box_query( + se, + axes=["x", "y"], + min_coordinate=[crd[0], crd[2]], + max_coordinate=[crd[1], crd[3]], + target_coordinate_system=to_coordinate_system, + ) + if se_crop is not None: + labels_layer_crop = f"__labels_{uuid.uuid4()}__" + sdata[labels_layer_crop] = se_crop + se = se_crop + else: + raise ValueError(f"Cropped spatial element using crd '{crd}' is None.") + + unique_values = da.unique(se.data).compute() + labels = unique_values[unique_values != 0] + + cluster_ids = labels + + intermediate_table_key = f"__value_clusters__{uuid.uuid4()}" + + # create a dummy anndata object, so we can plot cluster ID's spatially using spatialdata plot + obs = pd.DataFrame({_INSTANCE_KEY: cluster_ids}, index=cluster_ids) + obs.index = obs.index.astype(str) # index needs to be str, otherwise anndata complains + + count_matrix = csr_matrix((labels.shape[0], 0)) + + adata = ad.AnnData(X=count_matrix, obs=obs) + adata.obs[_INSTANCE_KEY] = adata.obs[_INSTANCE_KEY].astype(int) + + adata.obs[_REGION_KEY] = labels_layer if labels_layer_crop is None else labels_layer_crop + adata.obs[_REGION_KEY] = adata.obs[_REGION_KEY].astype("category") + + adata = TableModel.parse( + adata=adata, + region=labels_layer if labels_layer_crop is None else labels_layer_crop, + region_key=_REGION_KEY, + instance_key=_INSTANCE_KEY, + ) + + sdata[intermediate_table_key] = adata + + sdata[intermediate_table_key].obs[f"{_INSTANCE_KEY}_cat"] = ( + sdata[intermediate_table_key].obs[_INSTANCE_KEY].astype("category") + ) + + ax = sdata.pl.render_labels( + labels_layer if labels_layer_crop is None else labels_layer_crop, + table_name=intermediate_table_key, + color=f"{_INSTANCE_KEY}_cat", + **render_labels_kwargs, + ).pl.show( + **kwargs, + return_ax=True, + ) + if output is not None: + ax.figure.savefig(output) + else: + plt.show() + plt.close(ax.figure) + + del sdata.tables[intermediate_table_key] + if labels_layer_crop is not None: + del sdata.labels[labels_layer_crop] + + +def pixel_clusters_heatmap( + sdata: SpatialData, + table_layer: str, # obtained via hp.tb.cluster_intensity + metaclusters: bool = True, + z_score: bool = True, + clip_value: float | None = 3, + output: str | Path | None = None, + figsize: tuple[int, int] = (20, 20), + fig_kwargs: Mapping[str, Any] = MappingProxyType({}), # kwargs passed to plt.figure, e.g. dpi + **kwargs, # kwargs passed to sns.heatmap +): + """ + Generate and visualize a heatmap of mean channel intensities for clusters or metaclusters. + + Parameters + ---------- + sdata + The input SpatialData object. + table_layer + The table layer in `sdata` containing cluster intensity for clusters and metaclusters, obtained via `hp.tb.cluster_intensity`. + metaclusters + Whether to display mean channel intensity per metacluster (`True`) or per cluster (`False`). + z_score + Whether to z-score the intensity values for normalization. We recommend setting this to `True`. + clip_value + The value to clip the z-scored data to, for better visualization. If `None`, no clipping is performed. + Ignored if `z_score` is `False`. + output + The path to save the generated heatmap. If `None`, the heatmap will be displayed directly using `plt.show()`. + figsize + Size of the figure in inches (width, height). + fig_kwargs + Additional keyword arguments passed to `plt.figure`, such as `dpi`. + **kwargs + Additional keyword arguments passed to `sns.heatmap`, such as `annot`, `cmap`, or `cbar_kws`. + + Returns + ------- + None + The function generates and displays or saves a heatmap. + + Notes + ----- + The heatmap shows mean channel intensities for either clusters or metaclusters, optionally normalized using z-scoring. + Clusters and metaclusters are ordered based on hierarchical clustering of their channel intensity profiles. + + The function uses cosine similarity to compute the distance matrix for hierarchical clustering of channels. + + Example + ------- + + >>> harpy.tb.pixel_clusters_heatmap( + ... sdata, + ... table_layer="counts_clusters", + ... figsize=(30, 20), + ... fig_kwargs={"dpi": 100}, + ... metaclusters=True, + ... z_score=True, + ... output="heatmap.png" + ... ) + + See Also + -------- + harpy.tb.cluster_intensity + """ + # clusters + df = sdata.tables[table_layer].to_df().copy() + df.index = sdata.tables[table_layer].obs[_INSTANCE_KEY].values + + # sort clusters by metaclusters + cluster_info = sdata.tables[table_layer].obs.copy() + cluster_info_sorted = cluster_info.sort_values([ClusteringKey._METACLUSTERING_KEY.value, _INSTANCE_KEY]) + cluster_info_sorted.index = cluster_info_sorted[_INSTANCE_KEY] + sorted_clusters = cluster_info_sorted.index.tolist() + df = df.loc[sorted_clusters, :] + + new_index_labels = [] + for cid in df.index: + mc_id = cluster_info_sorted.loc[cid, ClusteringKey._METACLUSTERING_KEY.value] + new_index_labels.append(f"{cid} ({mc_id})") + df.index = new_index_labels + + # metaclusters + df_metaclusters = sdata.tables[table_layer].uns[ClusteringKey._METACLUSTERING_KEY.value].copy() + df_metaclusters.index = df_metaclusters[ClusteringKey._METACLUSTERING_KEY.value] + df_metaclusters.drop(ClusteringKey._METACLUSTERING_KEY.value, axis=1, inplace=True) + + if z_score: + df = df.apply(zscore) + df_metaclusters = df_metaclusters.apply(zscore) + if clip_value is not None: + df = df.clip(lower=-clip_value, upper=clip_value) + df_metaclusters = df_metaclusters.clip(lower=-clip_value, upper=clip_value) + + # create dendogram to cluster channel names together that have similar features + # ( features are the intensity per metacluster here ) + dist_matrix = cosine_similarity(df_metaclusters.values.T) + linkage_matrix = ward(dist_matrix) + channel_names = df_metaclusters.columns + dendro_info = dendrogram(linkage_matrix, labels=channel_names, no_plot=True) + channel_order = dendro_info["ivl"] + # sort both metaclusters and clusters based on dendogram clustering results + df_metaclusters = df_metaclusters[channel_order] + df = df[channel_order] + + # Create a heatmap + plt.figure(figsize=figsize, **fig_kwargs) + annot = kwargs.pop("annot", False) + cmap = kwargs.pop("cmap", "coolwarm") + fmt = kwargs.pop("fmt", ".2f") + _label = "Mean Intensity (z-score)" if z_score else "Mean Intensity" + cbar_kws = kwargs.pop("cbar_kws", {"label": _label}) + sns.heatmap( + df_metaclusters.transpose() if metaclusters else df.transpose(), + annot=annot, + cmap=cmap, + fmt=fmt, + cbar_kws=cbar_kws, + **kwargs, + ) + _title = "metacluster" if metaclusters else "cluster" + plt.title(f"Mean Channel Intensity per {_title}") + plt.ylabel("Channel") + _x_label = "Metacluster ID" if metaclusters else "Cluster ID (Metacluster ID)" + plt.xlabel(_x_label) + + if output is None: + plt.show() + else: + plt.savefig(output, bbox_inches="tight") + plt.close() diff --git a/src/harpy/plot/_plot.py b/src/harpy/plot/_plot.py index 5849233..727204a 100644 --- a/src/harpy/plot/_plot.py +++ b/src/harpy/plot/_plot.py @@ -49,7 +49,7 @@ def plot_image( z_slice The z_slice to visualize in case of 3D (c,z,y,x) image. crd - The coordinates for the region of interest in the format (xmin, xmax, ymin, ymax). If None, the entire image is considered, by default None. + The coordinates for the region of interest in the format `(xmin, xmax, ymin, ymax)`. If `None`, the entire image is considered, by default `None`. to_coordinate_system Coordinate system to plot. output @@ -95,7 +95,7 @@ def plot_labels( z_slice The z_slice to visualize in case of 3D (c,z,y,x) labels. crd - The coordinates for the region of interest in the format (xmin, xmax, ymin, ymax). If None, the entire image is considered, by default None. + The coordinates for the region of interest in the format `(xmin, xmax, ymin, ymax)`. If `None`, the entire image is considered, by default `None`. to_coordinate_system Coordinate system to plot. output @@ -223,7 +223,7 @@ def plot_shapes( Column in the `shapes_layer` specifying the radius. The radius will be applied using `geometry.buffer` before plotting `shapes_layer`. Useful when the `geometry` of the `shapes_layer` contains points instead of polygons. crd - The coordinates for the region of interest in the format (xmin, xmax, ymin, ymax). If None, the entire image is considered, by default None. + The coordinates for the region of interest in the format `(xmin, xmax, ymin, ymax)`. If `None`, the entire image is considered, by default `None`. to_coordinate_system Coordinate system to plot. vmin @@ -473,7 +473,7 @@ def _plot( Column in the `shapes_layer` specifying the radius. The radius will be applied using `geometry.buffer` before plotting `shapes_layer`. Useful when the `geometry` of the `shapes_layer` contains points instead of polygons. crd - The coordinates for the region of interest in the format (xmin, xmax, ymin, ymax). If None, the entire image is considered, by default None. + The coordinates for the region of interest in the format `(xmin, xmax, ymin, ymax)`. If `None`, the entire image is considered, by default `None`. to_coordinate_system Coordinate system to plot. vmin diff --git a/src/harpy/table/pixel_clustering/_cluster_intensity.py b/src/harpy/table/pixel_clustering/_cluster_intensity.py index 2c61073..92251cc 100644 --- a/src/harpy/table/pixel_clustering/_cluster_intensity.py +++ b/src/harpy/table/pixel_clustering/_cluster_intensity.py @@ -110,6 +110,9 @@ def cluster_intensity( append = False else: append = True + log.info( + f"Start allocation of intensities of image layer with name '{_img_layer}' by labels in labels layer with name '{_labels_layer}'." + ) sdata = allocate_intensity( sdata, img_layer=_img_layer, @@ -121,7 +124,11 @@ def cluster_intensity( append=append, overwrite=overwrite, ) + log.info( + f"End allocation of image layer with name '{_img_layer}' and labels layer with name '{_labels_layer}'." + ) + log.info("Start preprocessing.") # for size normalization of cluster intensities sdata = preprocess_proteomics( sdata, @@ -134,6 +141,7 @@ def cluster_intensity( calculate_pca=False, overwrite=True, ) + log.info("End preprocessing.") # we are interested in the non-normalized counts (to account for multiple fov's) array = sdata.tables[output_layer].layers[_RAW_COUNTS_KEY]