Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify and lazify broadcast_to_shape (#5307) #5359

Merged
merged 2 commits into from
Jun 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions docs/src/whatsnew/3.6.rst
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@ This document explains the changes made to Iris for this release

The patches in this release of Iris include:

✨ **Features**

#. `@rcomer`_ rewrote :func:`~iris.util.broadcast_to_shape` so it now handles
lazy data. This pull-request has been included to support :pull:`5341`.
(:pull:`5307`) [``pre-v3.7.0``]

🐛 **Bugs Fixed**

#. `@stephenworsley`_ fixed :meth:`~iris.cube.Cube.convert_units` to allow unit
Expand All @@ -90,6 +96,10 @@ This document explains the changes made to Iris for this release
minimal in-core memory footprint.
(:issue:`5115`, :pull:`5142`)

Note that, the above contribution labelled with ``pre-v3.7.0`` is part of the
forthcoming Iris ``v3.7.0`` release, but requires to be included in this patch
release.


📢 Announcements
================
Expand Down
9 changes: 9 additions & 0 deletions lib/iris/_lazy_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,15 @@ def is_lazy_data(data):
return result


def is_lazy_masked_data(data):
"""
Return True if the argument is both an Iris 'lazy' data array and the
underlying array is of masked type. Otherwise return False.

"""
return is_lazy_data(data) and ma.isMA(da.utils.meta_from_array(data))


@lru_cache
def _optimum_chunksize_internals(
chunks,
Expand Down
27 changes: 27 additions & 0 deletions lib/iris/tests/unit/lazy_data/test_is_lazy_masked_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the LGPL license.
# See COPYING and COPYING.LESSER in the root of the repository for full
# licensing details.
"""Test function :func:`iris._lazy data.is_lazy_masked_data`."""

import dask.array as da
import numpy as np
import pytest

from iris._lazy_data import is_lazy_masked_data

real_arrays = [
np.arange(3),
np.ma.array(range(3)),
np.ma.array(range(3), mask=[0, 1, 1]),
]
lazy_arrays = [da.from_array(arr) for arr in real_arrays]


@pytest.mark.parametrize(
"arr, expected", zip(real_arrays + lazy_arrays, [False] * 4 + [True] * 2)
)
def test_is_lazy_masked_data(arr, expected):
result = is_lazy_masked_data(arr)
assert result is expected
28 changes: 28 additions & 0 deletions lib/iris/tests/unit/util/test_broadcast_to_shape.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
# importing anything else
import iris.tests as tests # isort:skip

from unittest import mock

import dask
import dask.array as da
import numpy as np
import numpy.ma as ma

Expand Down Expand Up @@ -40,6 +44,17 @@ def test_added_dimensions_transpose(self):
for j in range(4):
self.assertArrayEqual(b[i, :, j, :].T, a)

@mock.patch.object(dask.base, "compute", wraps=dask.base.compute)
def test_lazy_added_dimensions_transpose(self, mocked_compute):
# adding dimensions and having the dimensions of the input
# transposed
a = da.random.random([2, 3])
b = broadcast_to_shape(a, (5, 3, 4, 2), (3, 1))
mocked_compute.assert_not_called()
for i in range(5):
for j in range(4):
self.assertArrayEqual(b[i, :, j, :].T.compute(), a.compute())

def test_masked(self):
# masked arrays are also accepted
a = np.random.random([2, 3])
Expand All @@ -49,6 +64,19 @@ def test_masked(self):
for j in range(4):
self.assertMaskedArrayEqual(b[i, :, j, :].T, m)

@mock.patch.object(dask.base, "compute", wraps=dask.base.compute)
def test_lazy_masked(self, mocked_compute):
# masked arrays are also accepted
a = np.random.random([2, 3])
m = da.ma.masked_array(a, mask=[[0, 1, 0], [0, 1, 1]])
b = broadcast_to_shape(m, (5, 3, 4, 2), (3, 1))
mocked_compute.assert_not_called()
for i in range(5):
for j in range(4):
self.assertMaskedArrayEqual(
b[i, :, j, :].compute().T, m.compute()
)

def test_masked_degenerate(self):
# masked arrays can have degenerate masks too
a = np.random.random([2, 3])
Expand Down
54 changes: 24 additions & 30 deletions lib/iris/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import numpy.ma as ma

from iris._deprecation import warn_deprecated
from iris._lazy_data import as_concrete_data, is_lazy_data
from iris._lazy_data import as_concrete_data, is_lazy_data, is_lazy_masked_data
from iris.common import SERVICES
from iris.common.lenient import _lenient_client
import iris.exceptions
Expand All @@ -34,8 +34,7 @@ def broadcast_to_shape(array, shape, dim_map):
Broadcast an array to a given shape.

Each dimension of the array must correspond to a dimension in the
given shape. Striding is used to repeat the array until it matches
the desired shape, returning repeated views on the original array.
given shape. The result is a read-only view (see :func:`numpy.broadcast_to`).
If you need to write to the resulting array, make a copy first.

Args:
Expand Down Expand Up @@ -76,35 +75,30 @@ def broadcast_to_shape(array, shape, dim_map):
See more at :doc:`/userguide/real_and_lazy_data`.

"""
if len(dim_map) != array.ndim:
# We must check for this condition here because we cannot rely on
# getting an error from numpy if the dim_map argument is not the
# correct length, we might just get a segfault.
raise ValueError(
"dim_map must have an entry for every "
"dimension of the input array"
)
n_orig_dims = len(array.shape)
n_new_dims = len(shape) - n_orig_dims
array = array.reshape(array.shape + (1,) * n_new_dims)

# Get dims in required order.
array = np.moveaxis(array, range(n_orig_dims), dim_map)
new_array = np.broadcast_to(array, shape)

def _broadcast_helper(a):
strides = [0] * len(shape)
for idim, dim in enumerate(dim_map):
if shape[dim] != a.shape[idim]:
# We'll get garbage values if the dimensions of array are not
# those indicated by shape.
raise ValueError("shape and array are not compatible")
strides[dim] = a.strides[idim]
return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)

array_view = _broadcast_helper(array)
if ma.isMaskedArray(array):
if array.mask is ma.nomask:
# Degenerate masks can be applied as-is.
mask_view = array.mask
if ma.isMA(array):
# broadcast_to strips masks so we need to handle them explicitly.
mask = ma.getmask(array)
if mask is ma.nomask:
new_mask = ma.nomask
else:
# Mask arrays need to be handled in the same way as the data array.
mask_view = _broadcast_helper(array.mask)
array_view = ma.array(array_view, mask=mask_view)
return array_view
new_mask = np.broadcast_to(mask, shape)
new_array = ma.array(new_array, mask=new_mask)

elif is_lazy_masked_data(array):
# broadcast_to strips masks so we need to handle them explicitly.
mask = da.ma.getmaskarray(array)
new_mask = da.broadcast_to(mask, shape)
new_array = da.ma.masked_array(new_array, new_mask)

return new_array


def delta(ndarray, dimension, circular=False):
Expand Down