diff --git a/doc/basic.rst b/doc/basic.rst index fa0d57d0..096ba63c 100644 --- a/doc/basic.rst +++ b/doc/basic.rst @@ -9,3 +9,4 @@ In the following, you find a number of tutorials that demonstrate the basic capa customization 4dobc-creation 4dobc-analysis + 4dobc-customization diff --git a/jupyter/4dobc-customization.ipynb b/jupyter/4dobc-customization.ipynb new file mode 100644 index 00000000..acb9ce2e --- /dev/null +++ b/jupyter/4dobc-customization.ipynb @@ -0,0 +1,301 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a33e62fc-7d82-4142-b58f-7f85ae303957", + "metadata": {}, + "source": [ + "## 4D Object By Change - Customizing the algorithm" + ] + }, + { + "cell_type": "markdown", + "id": "745e09d3-f1a2-4dbe-a2f9-b17c8cf3295d", + "metadata": {}, + "source": [ + "The 4D-OBC implementation provided by `py4dgeo` can be customized similarly to [how M3C2 is customized](customization.ipynb). The fundamental idea is to create your own algorithm class derived from `py4dgeo`'s `RegionGrowingAlgorithm` class and override only those parts of the algorithm that you want to change. This notebook introduces the currently existing customization points." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "99cb21a9-be08-4aa8-ab66-421eec6f5879", + "metadata": {}, + "outputs": [], + "source": [ + "import py4dgeo\n", + "import numpy as np\n", + "\n", + "import py4dgeo._py4dgeo as _py4dgeo # The C++ bindings module for py4dgeo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "81daa78f-6091-4e52-a115-c6ff1b0d6754", + "metadata": {}, + "outputs": [], + "source": [ + "# Load test data\n", + "py4dgeo.ensure_test_data_availability()\n", + "analysis = py4dgeo.SpatiotemporalAnalysis(\"synthetic.zip\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c02d630-8c4d-4afa-9e82-42c77f9f987e", + "metadata": {}, + "outputs": [], + "source": [ + "# We are disabling log messages on this tutorial to increase the readability of the output\n", + "import logging\n", + "\n", + "logging.disable()" + ] + }, + { + "cell_type": "markdown", + "id": "cca1bd7e-a448-4fb6-b164-ace459ff8e98", + "metadata": {}, + "source": [ + "### Distance Measure" + ] + }, + { + "cell_type": "markdown", + "id": "932d01fa-201b-40a7-bbeb-4f67b92d03b5", + "metadata": {}, + "source": [ + "By default, 4D-OBC uses a normalized Dynamic Time Warping distance measure. You can provide your own Python function, although all the same warnings as with M3C2 apply: Python code will be significantly slower compared to C++ implementations and will be run sequentially even if you are using OpenMP." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c694e747-f8ce-4db3-ba26-e5b0169e506b", + "metadata": {}, + "outputs": [], + "source": [ + "def custom_distance(params: _py4dgeo.TimeseriesDistanceFunctionData):\n", + " mask = ~np.isnan(params.ts1) & ~np.isnan(params.ts2)\n", + " if not np.any(mask):\n", + " return np.nan\n", + "\n", + " # Mask the two input arrays\n", + " masked_ts1 = params.ts1[mask]\n", + " masked_ts2 = params.ts2[mask]\n", + "\n", + " return np.sum(np.abs(masked_ts1 - masked_ts2)) / np.sum(\n", + " np.abs(np.maximum(masked_ts1, masked_ts2))\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f28014a0-b99a-4d35-8bc5-934551fa8a94", + "metadata": {}, + "outputs": [], + "source": [ + "class CustomDistance4DOBC(py4dgeo.RegionGrowingAlgorithm):\n", + " def distance_measure(self):\n", + " return custom_distance" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "187f9d30-3fb6-4cdb-95b4-13b84391648b", + "metadata": {}, + "outputs": [], + "source": [ + "analysis.invalidate_results()\n", + "objects = CustomDistance4DOBC(\n", + " neighborhood_radius=2.0,\n", + " seed_subsampling=20,\n", + ").run(analysis)" + ] + }, + { + "cell_type": "markdown", + "id": "a609c9fc-a693-477d-8317-ac7875697178", + "metadata": {}, + "source": [ + "The `params` data structure passed to the distance function contains the following fields:\n", + "* `ts1` and `ts2` are the time series to compare which may include `NaN` values\n", + "* `norm1` and `norm2` which are normalization factors" + ] + }, + { + "cell_type": "markdown", + "id": "1b351d11-c156-4dba-903e-3dadf411a299", + "metadata": {}, + "source": [ + "### Prioritizing seeds" + ] + }, + { + "cell_type": "markdown", + "id": "7b102773-4462-4686-91b2-ec155cad958a", + "metadata": {}, + "source": [ + "The 4D-OBC algorithm finds a number of seed locations for region growing and then prioritizes these seeds by sorting them according to a criterion. You can pass your own criterium like this:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8eee3c4b-39bb-43a4-b037-c9050678d136", + "metadata": {}, + "outputs": [], + "source": [ + "def sorting_criterion(seed):\n", + " # Choose a random score, resulting in random seed order\n", + " return np.random.rand()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20c80f81-7500-4ef5-b63d-9b3ee9b17187", + "metadata": {}, + "outputs": [], + "source": [ + "class CustomSeedSorting(py4dgeo.RegionGrowingAlgorithm):\n", + " def seed_sorting_scorefunction(self):\n", + " return sorting_criterion" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9630b3d-6057-405a-9e6a-0ee000041d8b", + "metadata": {}, + "outputs": [], + "source": [ + "analysis.invalidate_results()\n", + "objects = CustomSeedSorting(\n", + " neighborhood_radius=2.0,\n", + " seed_subsampling=20,\n", + ").run(analysis)" + ] + }, + { + "cell_type": "markdown", + "id": "a15d7a44-0447-4b66-a3b4-51c7532a2bca", + "metadata": {}, + "source": [ + "### Rejecting grown objects" + ] + }, + { + "cell_type": "markdown", + "id": "147d2bbf-5520-4ee8-92f7-2b31061ae068", + "metadata": {}, + "source": [ + "After the region growing is done, the algorithm class calls it method `filter_objects` to check whether the object should be used or discarded. The method must return `True` (keep) or `False` (discard):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f1df003-cc0a-4669-9cab-a52817961d38", + "metadata": {}, + "outputs": [], + "source": [ + "class RejectSmallObjects(py4dgeo.RegionGrowingAlgorithm):\n", + " def filter_objects(self, obj):\n", + " return len(obj.indices) > 10" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3b5bd9ca-671c-4f64-94d7-237677d5e1a5", + "metadata": {}, + "outputs": [], + "source": [ + "analysis.invalidate_results()\n", + "objects = RejectSmallObjects(\n", + " neighborhood_radius=2.0,\n", + " seed_subsampling=20,\n", + ").run(analysis)" + ] + }, + { + "cell_type": "markdown", + "id": "ec7eea12-c9c7-4b2f-9026-9206023d7b93", + "metadata": {}, + "source": [ + "### Seed point detection" + ] + }, + { + "cell_type": "markdown", + "id": "86e53d40-fc3b-4fb5-ab6e-04c58d58b451", + "metadata": {}, + "source": [ + "If you would like to run an entirely different algorithm to determine the seeds for region growing, you can do so by overriding `find_seedpoints`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3514be02-6e34-4b24-b5f2-acd7fa742879", + "metadata": {}, + "outputs": [], + "source": [ + "from py4dgeo.segmentation import RegionGrowingSeed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba272b94-e0a0-4870-b95c-87c574dc189a", + "metadata": {}, + "outputs": [], + "source": [ + "class DifferentSeeds(py4dgeo.RegionGrowingAlgorithm):\n", + " def find_seedpoints(self):\n", + " # Use one seed for corepoint 0 and the entire time interval\n", + " return [RegionGrowingSeed(0, 0, self.analysis.distances.shape[1] - 1)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d756a7e4-599a-4901-9ae6-0932cde00ad6", + "metadata": {}, + "outputs": [], + "source": [ + "analysis.invalidate_results()\n", + "objects = DifferentSeeds(\n", + " neighborhood_radius=2.0,\n", + " seed_subsampling=20,\n", + ").run(analysis)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/python/test_segmentation.py b/tests/python/test_segmentation.py index 1497e04b..af7dbc26 100644 --- a/tests/python/test_segmentation.py +++ b/tests/python/test_segmentation.py @@ -168,3 +168,31 @@ def test_change_point_detection_against_ruptures( assert len(rcp) == len(cpp) for r, c in zip(rcp, cpp): assert r == c + + +def test_custom_distance_function(analysis): + def custom_distance(params): + # NB: This is only a proof-of-concept how individual distance measures + # can be included into 4D-OBC. Reimplementing DTW distance calculation + # in Python is horribly slow. + mask = ~np.isnan(params.ts1) & ~np.isnan(params.ts2) + if not np.any(mask): + return np.nan + + # Mask the two input arrays + masked_ts1 = params.ts1[mask] + masked_ts2 = params.ts2[mask] + + return np.sum(np.abs(masked_ts1 - masked_ts2)) / np.sum( + np.abs(np.maximum(masked_ts1, masked_ts2)) + ) + + class Custom4DOBC(RegionGrowingAlgorithm): + """An implementation of 4D-OBC that makes use of Python fallback implementations""" + + def distance_measure(self): + return custom_distance + + # Run custom algorithm + algo = Custom4DOBC(neighborhood_radius=2.0, seed_subsampling=20) + objects = algo.run(analysis)