From a5292f6b98ca7bc03106dbac47aacf05473144f5 Mon Sep 17 00:00:00 2001 From: Ruth Comer <10599679+rcomer@users.noreply.github.com> Date: Thu, 1 Jun 2023 08:53:02 +0100 Subject: [PATCH 1/2] Simplify and lazify broadcast_to_shape (#5307) * working for all except masked lazy * use moveaxis * handle lazy masked case * add tests for is_lazy_masked_data * whatsnew * check compute isn't called * update docstring --- docs/src/whatsnew/3.6.rst | 6 +++ lib/iris/_lazy_data.py | 9 ++++ .../lazy_data/test_is_lazy_masked_data.py | 27 ++++++++++ .../unit/util/test_broadcast_to_shape.py | 28 ++++++++++ lib/iris/util.py | 54 +++++++++---------- 5 files changed, 94 insertions(+), 30 deletions(-) create mode 100644 lib/iris/tests/unit/lazy_data/test_is_lazy_masked_data.py diff --git a/docs/src/whatsnew/3.6.rst b/docs/src/whatsnew/3.6.rst index 136d47e15d..a943108215 100644 --- a/docs/src/whatsnew/3.6.rst +++ b/docs/src/whatsnew/3.6.rst @@ -70,6 +70,12 @@ This document explains the changes made to Iris for this release The patches in this release of Iris include: + ✨ **Features** + + #. `@rcomer`_ rewrote :func:`~iris.util.broadcast_to_shape` so it now handles + lazy data. This pull-request has been included to support :pull:`5341`. + (:pull:`5307`) [``pre-v3.7.0``] + 🐛 **Bugs Fixed** #. `@stephenworsley`_ fixed :meth:`~iris.cube.Cube.convert_units` to allow unit diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py index e0566fc8f2..4c294a7d2f 100644 --- a/lib/iris/_lazy_data.py +++ b/lib/iris/_lazy_data.py @@ -47,6 +47,15 @@ def is_lazy_data(data): return result +def is_lazy_masked_data(data): + """ + Return True if the argument is both an Iris 'lazy' data array and the + underlying array is of masked type. Otherwise return False. + + """ + return is_lazy_data(data) and ma.isMA(da.utils.meta_from_array(data)) + + @lru_cache def _optimum_chunksize_internals( chunks, diff --git a/lib/iris/tests/unit/lazy_data/test_is_lazy_masked_data.py b/lib/iris/tests/unit/lazy_data/test_is_lazy_masked_data.py new file mode 100644 index 0000000000..4d627a706b --- /dev/null +++ b/lib/iris/tests/unit/lazy_data/test_is_lazy_masked_data.py @@ -0,0 +1,27 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the LGPL license. +# See COPYING and COPYING.LESSER in the root of the repository for full +# licensing details. +"""Test function :func:`iris._lazy data.is_lazy_masked_data`.""" + +import dask.array as da +import numpy as np +import pytest + +from iris._lazy_data import is_lazy_masked_data + +real_arrays = [ + np.arange(3), + np.ma.array(range(3)), + np.ma.array(range(3), mask=[0, 1, 1]), +] +lazy_arrays = [da.from_array(arr) for arr in real_arrays] + + +@pytest.mark.parametrize( + "arr, expected", zip(real_arrays + lazy_arrays, [False] * 4 + [True] * 2) +) +def test_is_lazy_masked_data(arr, expected): + result = is_lazy_masked_data(arr) + assert result is expected diff --git a/lib/iris/tests/unit/util/test_broadcast_to_shape.py b/lib/iris/tests/unit/util/test_broadcast_to_shape.py index 36f00fa53f..3df1634ba5 100644 --- a/lib/iris/tests/unit/util/test_broadcast_to_shape.py +++ b/lib/iris/tests/unit/util/test_broadcast_to_shape.py @@ -9,6 +9,10 @@ # importing anything else import iris.tests as tests # isort:skip +from unittest import mock + +import dask +import dask.array as da import numpy as np import numpy.ma as ma @@ -40,6 +44,17 @@ def test_added_dimensions_transpose(self): for j in range(4): self.assertArrayEqual(b[i, :, j, :].T, a) + @mock.patch.object(dask.base, "compute", wraps=dask.base.compute) + def test_lazy_added_dimensions_transpose(self, mocked_compute): + # adding dimensions and having the dimensions of the input + # transposed + a = da.random.random([2, 3]) + b = broadcast_to_shape(a, (5, 3, 4, 2), (3, 1)) + mocked_compute.assert_not_called() + for i in range(5): + for j in range(4): + self.assertArrayEqual(b[i, :, j, :].T.compute(), a.compute()) + def test_masked(self): # masked arrays are also accepted a = np.random.random([2, 3]) @@ -49,6 +64,19 @@ def test_masked(self): for j in range(4): self.assertMaskedArrayEqual(b[i, :, j, :].T, m) + @mock.patch.object(dask.base, "compute", wraps=dask.base.compute) + def test_lazy_masked(self, mocked_compute): + # masked arrays are also accepted + a = np.random.random([2, 3]) + m = da.ma.masked_array(a, mask=[[0, 1, 0], [0, 1, 1]]) + b = broadcast_to_shape(m, (5, 3, 4, 2), (3, 1)) + mocked_compute.assert_not_called() + for i in range(5): + for j in range(4): + self.assertMaskedArrayEqual( + b[i, :, j, :].compute().T, m.compute() + ) + def test_masked_degenerate(self): # masked arrays can have degenerate masks too a = np.random.random([2, 3]) diff --git a/lib/iris/util.py b/lib/iris/util.py index d96e0ee359..0b31ebdafc 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -23,7 +23,7 @@ import numpy.ma as ma from iris._deprecation import warn_deprecated -from iris._lazy_data import as_concrete_data, is_lazy_data +from iris._lazy_data import as_concrete_data, is_lazy_data, is_lazy_masked_data from iris.common import SERVICES from iris.common.lenient import _lenient_client import iris.exceptions @@ -34,8 +34,7 @@ def broadcast_to_shape(array, shape, dim_map): Broadcast an array to a given shape. Each dimension of the array must correspond to a dimension in the - given shape. Striding is used to repeat the array until it matches - the desired shape, returning repeated views on the original array. + given shape. The result is a read-only view (see :func:`numpy.broadcast_to`). If you need to write to the resulting array, make a copy first. Args: @@ -76,35 +75,30 @@ def broadcast_to_shape(array, shape, dim_map): See more at :doc:`/userguide/real_and_lazy_data`. """ - if len(dim_map) != array.ndim: - # We must check for this condition here because we cannot rely on - # getting an error from numpy if the dim_map argument is not the - # correct length, we might just get a segfault. - raise ValueError( - "dim_map must have an entry for every " - "dimension of the input array" - ) + n_orig_dims = len(array.shape) + n_new_dims = len(shape) - n_orig_dims + array = array.reshape(array.shape + (1,) * n_new_dims) + + # Get dims in required order. + array = np.moveaxis(array, range(n_orig_dims), dim_map) + new_array = np.broadcast_to(array, shape) - def _broadcast_helper(a): - strides = [0] * len(shape) - for idim, dim in enumerate(dim_map): - if shape[dim] != a.shape[idim]: - # We'll get garbage values if the dimensions of array are not - # those indicated by shape. - raise ValueError("shape and array are not compatible") - strides[dim] = a.strides[idim] - return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides) - - array_view = _broadcast_helper(array) - if ma.isMaskedArray(array): - if array.mask is ma.nomask: - # Degenerate masks can be applied as-is. - mask_view = array.mask + if ma.isMA(array): + # broadcast_to strips masks so we need to handle them explicitly. + mask = ma.getmask(array) + if mask is ma.nomask: + new_mask = ma.nomask else: - # Mask arrays need to be handled in the same way as the data array. - mask_view = _broadcast_helper(array.mask) - array_view = ma.array(array_view, mask=mask_view) - return array_view + new_mask = np.broadcast_to(mask, shape) + new_array = ma.array(new_array, mask=new_mask) + + elif is_lazy_masked_data(array): + # broadcast_to strips masks so we need to handle them explicitly. + mask = da.ma.getmaskarray(array) + new_mask = da.broadcast_to(mask, shape) + new_array = da.ma.masked_array(new_array, new_mask) + + return new_array def delta(ndarray, dimension, circular=False): From 8fa83d2185ab1f35ffcb71db578ed6e5a6bece76 Mon Sep 17 00:00:00 2001 From: Bill Little Date: Mon, 26 Jun 2023 12:48:25 +0100 Subject: [PATCH 2/2] add whatnew patch footnote --- docs/src/whatsnew/3.6.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/src/whatsnew/3.6.rst b/docs/src/whatsnew/3.6.rst index a943108215..c81307a7ba 100644 --- a/docs/src/whatsnew/3.6.rst +++ b/docs/src/whatsnew/3.6.rst @@ -96,6 +96,10 @@ This document explains the changes made to Iris for this release minimal in-core memory footprint. (:issue:`5115`, :pull:`5142`) + Note that, the above contribution labelled with ``pre-v3.7.0`` is part of the + forthcoming Iris ``v3.7.0`` release, but requires to be included in this patch + release. + 📢 Announcements ================