SciTools · trexfeathers · Jun 26, 2023 · Jun 1, 2023 · Jun 26, 2023
diff --git a/docs/src/whatsnew/3.6.rst b/docs/src/whatsnew/3.6.rst
@@ -70,6 +70,12 @@ This document explains the changes made to Iris for this release
 
    The patches in this release of Iris include:
 
+   ✨ **Features**
+
+   #. `@rcomer`_ rewrote :func:`~iris.util.broadcast_to_shape` so it now handles
+      lazy data. This pull-request has been included to support :pull:`5341`.
+      (:pull:`5307`) [``pre-v3.7.0``]
+
    🐛 **Bugs Fixed**
 
    #. `@stephenworsley`_ fixed :meth:`~iris.cube.Cube.convert_units` to allow unit
@@ -90,6 +96,10 @@ This document explains the changes made to Iris for this release
       minimal in-core memory footprint.
       (:issue:`5115`, :pull:`5142`)
 
+   Note that, the above contribution labelled with ``pre-v3.7.0`` is part of the
+   forthcoming Iris ``v3.7.0`` release, but requires to be included in this patch
+   release.
+
 
 📢 Announcements
 ================

diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py
@@ -47,6 +47,15 @@ def is_lazy_data(data):
     return result
 
 
+def is_lazy_masked_data(data):
+    """
+    Return True if the argument is both an Iris 'lazy' data array and the
+    underlying array is of masked type.  Otherwise return False.
+
+    """
+    return is_lazy_data(data) and ma.isMA(da.utils.meta_from_array(data))
+
+
 @lru_cache
 def _optimum_chunksize_internals(
     chunks,

diff --git a/lib/iris/tests/unit/lazy_data/test_is_lazy_masked_data.py b/lib/iris/tests/unit/lazy_data/test_is_lazy_masked_data.py
@@ -0,0 +1,27 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+"""Test function :func:`iris._lazy data.is_lazy_masked_data`."""
+
+import dask.array as da
+import numpy as np
+import pytest
+
+from iris._lazy_data import is_lazy_masked_data
+
+real_arrays = [
+    np.arange(3),
+    np.ma.array(range(3)),
+    np.ma.array(range(3), mask=[0, 1, 1]),
+]
+lazy_arrays = [da.from_array(arr) for arr in real_arrays]
+
+
+@pytest.mark.parametrize(
+    "arr, expected", zip(real_arrays + lazy_arrays, [False] * 4 + [True] * 2)
+)
+def test_is_lazy_masked_data(arr, expected):
+    result = is_lazy_masked_data(arr)
+    assert result is expected
diff --git a/lib/iris/tests/unit/util/test_broadcast_to_shape.py b/lib/iris/tests/unit/util/test_broadcast_to_shape.py
@@ -9,6 +9,10 @@
 # importing anything else
 import iris.tests as tests  # isort:skip
 
+from unittest import mock
+
+import dask
+import dask.array as da
 import numpy as np
 import numpy.ma as ma
 
@@ -40,6 +44,17 @@ def test_added_dimensions_transpose(self):
             for j in range(4):
                 self.assertArrayEqual(b[i, :, j, :].T, a)
 
+    @mock.patch.object(dask.base, "compute", wraps=dask.base.compute)
+    def test_lazy_added_dimensions_transpose(self, mocked_compute):
+        # adding dimensions and having the dimensions of the input
+        # transposed
+        a = da.random.random([2, 3])
+        b = broadcast_to_shape(a, (5, 3, 4, 2), (3, 1))
+        mocked_compute.assert_not_called()
+        for i in range(5):
+            for j in range(4):
+                self.assertArrayEqual(b[i, :, j, :].T.compute(), a.compute())
+
     def test_masked(self):
         # masked arrays are also accepted
         a = np.random.random([2, 3])
@@ -49,6 +64,19 @@ def test_masked(self):
             for j in range(4):
                 self.assertMaskedArrayEqual(b[i, :, j, :].T, m)
 
+    @mock.patch.object(dask.base, "compute", wraps=dask.base.compute)
+    def test_lazy_masked(self, mocked_compute):
+        # masked arrays are also accepted
+        a = np.random.random([2, 3])
+        m = da.ma.masked_array(a, mask=[[0, 1, 0], [0, 1, 1]])
+        b = broadcast_to_shape(m, (5, 3, 4, 2), (3, 1))
+        mocked_compute.assert_not_called()
+        for i in range(5):
+            for j in range(4):
+                self.assertMaskedArrayEqual(
+                    b[i, :, j, :].compute().T, m.compute()
+                )
+
     def test_masked_degenerate(self):
         # masked arrays can have degenerate masks too
         a = np.random.random([2, 3])

diff --git a/lib/iris/util.py b/lib/iris/util.py
@@ -23,7 +23,7 @@
 import numpy.ma as ma
 
 from iris._deprecation import warn_deprecated
-from iris._lazy_data import as_concrete_data, is_lazy_data
+from iris._lazy_data import as_concrete_data, is_lazy_data, is_lazy_masked_data
 from iris.common import SERVICES
 from iris.common.lenient import _lenient_client
 import iris.exceptions
@@ -34,8 +34,7 @@ def broadcast_to_shape(array, shape, dim_map):
     Broadcast an array to a given shape.
 
     Each dimension of the array must correspond to a dimension in the
-    given shape. Striding is used to repeat the array until it matches
-    the desired shape, returning repeated views on the original array.
+    given shape. The result is a read-only view (see :func:`numpy.broadcast_to`).
     If you need to write to the resulting array, make a copy first.
 
     Args:
@@ -76,35 +75,30 @@ def broadcast_to_shape(array, shape, dim_map):
     See more at :doc:`/userguide/real_and_lazy_data`.
 
     """
-    if len(dim_map) != array.ndim:
-        # We must check for this condition here because we cannot rely on
-        # getting an error from numpy if the dim_map argument is not the
-        # correct length, we might just get a segfault.
-        raise ValueError(
-            "dim_map must have an entry for every "
-            "dimension of the input array"
-        )
+    n_orig_dims = len(array.shape)
+    n_new_dims = len(shape) - n_orig_dims
+    array = array.reshape(array.shape + (1,) * n_new_dims)
+
+    # Get dims in required order.
+    array = np.moveaxis(array, range(n_orig_dims), dim_map)
+    new_array = np.broadcast_to(array, shape)
 
-    def _broadcast_helper(a):
-        strides = [0] * len(shape)
-        for idim, dim in enumerate(dim_map):
-            if shape[dim] != a.shape[idim]:
-                # We'll get garbage values if the dimensions of array are not
-                # those indicated by shape.
-                raise ValueError("shape and array are not compatible")
-            strides[dim] = a.strides[idim]
-        return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)
-
-    array_view = _broadcast_helper(array)
-    if ma.isMaskedArray(array):
-        if array.mask is ma.nomask:
-            # Degenerate masks can be applied as-is.
-            mask_view = array.mask
+    if ma.isMA(array):
+        # broadcast_to strips masks so we need to handle them explicitly.
+        mask = ma.getmask(array)
+        if mask is ma.nomask:
+            new_mask = ma.nomask
         else:
-            # Mask arrays need to be handled in the same way as the data array.
-            mask_view = _broadcast_helper(array.mask)
-        array_view = ma.array(array_view, mask=mask_view)
-    return array_view
+            new_mask = np.broadcast_to(mask, shape)
+        new_array = ma.array(new_array, mask=new_mask)
+
+    elif is_lazy_masked_data(array):
+        # broadcast_to strips masks so we need to handle them explicitly.
+        mask = da.ma.getmaskarray(array)
+        new_mask = da.broadcast_to(mask, shape)
+        new_array = da.ma.masked_array(new_array, new_mask)
+
+    return new_array
 
 
 def delta(ndarray, dimension, circular=False):