diff --git a/dpctl/tensor/_copy_utils.py b/dpctl/tensor/_copy_utils.py index 565a11dec7..a1b154f4d1 100644 --- a/dpctl/tensor/_copy_utils.py +++ b/dpctl/tensor/_copy_utils.py @@ -246,6 +246,23 @@ def _broadcast_shapes(sh1, sh2): ).shape +def _broadcast_strides(X_shape, X_strides, res_ndim): + """ + Broadcasts strides to match the given dimensions; + returns tuple type strides. + """ + out_strides = [0] * res_ndim + X_shape_len = len(X_shape) + str_dim = -X_shape_len + for i in range(X_shape_len): + shape_value = X_shape[i] + if not shape_value == 1: + out_strides[str_dim] = X_strides[i] + str_dim += 1 + + return tuple(out_strides) + + def _copy_from_usm_ndarray_to_usm_ndarray(dst, src): if any( not isinstance(arg, dpt.usm_ndarray) @@ -268,7 +285,7 @@ def _copy_from_usm_ndarray_to_usm_ndarray(dst, src): except ValueError as exc: raise ValueError("Shapes of two arrays are not compatible") from exc - if dst.size < src.size: + if dst.size < src.size and dst.size < np.prod(common_shape): raise ValueError("Destination is smaller ") if len(common_shape) > dst.ndim: @@ -279,13 +296,33 @@ def _copy_from_usm_ndarray_to_usm_ndarray(dst, src): common_shape = common_shape[ones_count:] if src.ndim < len(common_shape): - new_src_strides = (0,) * (len(common_shape) - src.ndim) + src.strides + new_src_strides = _broadcast_strides( + src.shape, src.strides, len(common_shape) + ) + src_same_shape = dpt.usm_ndarray( + common_shape, dtype=src.dtype, buffer=src, strides=new_src_strides + ) + elif src.ndim == len(common_shape): + new_src_strides = _broadcast_strides( + src.shape, src.strides, len(common_shape) + ) src_same_shape = dpt.usm_ndarray( common_shape, dtype=src.dtype, buffer=src, strides=new_src_strides ) else: - src_same_shape = src - src_same_shape.shape = common_shape + # since broadcasting succeeded, src.ndim is greater because of + # leading sequence of ones, so we trim it + n = len(common_shape) + new_src_strides = _broadcast_strides( + src.shape[-n:], src.strides[-n:], n + ) + src_same_shape = dpt.usm_ndarray( + common_shape, + dtype=src.dtype, + buffer=src.usm_data, + strides=new_src_strides, + offset=src._element_offset, + ) _copy_same_shape(dst, src_same_shape) diff --git a/dpctl/tensor/_indexing_functions.py b/dpctl/tensor/_indexing_functions.py index d73a2e7952..188f3cd291 100644 --- a/dpctl/tensor/_indexing_functions.py +++ b/dpctl/tensor/_indexing_functions.py @@ -343,5 +343,5 @@ def nonzero(arr): "Expecting dpctl.tensor.usm_ndarray type, " f"got {type(arr)}" ) if arr.ndim == 0: - raise ValueError("Array of positive rank is exepcted") + raise ValueError("Array of positive rank is expected") return _nonzero_impl(arr) diff --git a/dpctl/tensor/_manipulation_functions.py b/dpctl/tensor/_manipulation_functions.py index 26c1ab60cf..9406e386af 100644 --- a/dpctl/tensor/_manipulation_functions.py +++ b/dpctl/tensor/_manipulation_functions.py @@ -25,6 +25,7 @@ import dpctl.tensor._tensor_impl as ti import dpctl.utils as dputils +from ._copy_utils import _broadcast_strides from ._type_utils import _to_device_supported_dtype __doc__ = ( @@ -120,23 +121,6 @@ def __repr__(self): return self._finfo.__repr__() -def _broadcast_strides(X_shape, X_strides, res_ndim): - """ - Broadcasts strides to match the given dimensions; - returns tuple type strides. - """ - out_strides = [0] * res_ndim - X_shape_len = len(X_shape) - str_dim = -X_shape_len - for i in range(X_shape_len): - shape_value = X_shape[i] - if not shape_value == 1: - out_strides[str_dim] = X_strides[i] - str_dim += 1 - - return tuple(out_strides) - - def _broadcast_shape_impl(shapes): if len(set(shapes)) == 1: return shapes[0] diff --git a/dpctl/tensor/_stride_utils.pxi b/dpctl/tensor/_stride_utils.pxi index ea59ec5402..bc7349a5e6 100644 --- a/dpctl/tensor/_stride_utils.pxi +++ b/dpctl/tensor/_stride_utils.pxi @@ -64,8 +64,7 @@ cdef int _from_input_shape_strides( cdef int j cdef bint all_incr = 1 cdef bint all_decr = 1 - cdef bint all_incr_modified = 0 - cdef bint all_decr_modified = 0 + cdef bint strides_inspected = 0 cdef Py_ssize_t elem_count = 1 cdef Py_ssize_t min_shift = 0 cdef Py_ssize_t max_shift = 0 @@ -167,15 +166,14 @@ cdef int _from_input_shape_strides( while (j < nd and shape_arr[j] == 1): j = j + 1 if j < nd: + strides_inspected = 1 if all_incr: - all_incr_modified = 1 all_incr = ( (strides_arr[i] > 0) and (strides_arr[j] > 0) and (strides_arr[i] <= strides_arr[j]) ) if all_decr: - all_decr_modified = 1 all_decr = ( (strides_arr[i] > 0) and (strides_arr[j] > 0) and @@ -183,11 +181,18 @@ cdef int _from_input_shape_strides( ) i = j else: + if not strides_inspected: + # all dimensions have size 1 except + # dimension 'i'. Array is both C and F + # contiguous + strides_inspected = 1 + all_incr = (strides_arr[i] == 1) + all_decr = all_incr break # should only set contig flags on actually obtained # values, rather than default values - all_incr = all_incr and all_incr_modified - all_decr = all_decr and all_decr_modified + all_incr = all_incr and strides_inspected + all_decr = all_decr and strides_inspected if all_incr and all_decr: contig[0] = (USM_ARRAY_C_CONTIGUOUS | USM_ARRAY_F_CONTIGUOUS) elif all_incr: diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 20a226bd3e..98986d84d6 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -816,7 +816,7 @@ cdef class usm_ndarray: return _take_multi_index(res, adv_ind, adv_ind_start_p) - def to_device(self, target): + def to_device(self, target, stream=None): """ to_device(target_device) Transfers this array to specified target device. @@ -856,6 +856,14 @@ cdef class usm_ndarray: cdef c_dpctl.DPCTLSyclQueueRef QRef = NULL cdef c_dpmem._Memory arr_buf d = Device.create_device(target) + + if (stream is None or type(stream) is not dpctl.SyclQueue or + stream == self.sycl_queue): + pass + else: + ev = self.sycl_queue.submit_barrier() + stream.submit_barrier(dependent_events=[ev]) + if (d.sycl_context == self.sycl_context): arr_buf = self.usm_data QRef = ( d.sycl_queue).get_queue_ref() @@ -1167,8 +1175,6 @@ cdef class usm_ndarray: if adv_ind_start_p < 0: # basic slicing if isinstance(rhs, usm_ndarray): - if Xv.size == 0: - return _copy_from_usm_ndarray_to_usm_ndarray(Xv, rhs) else: if hasattr(rhs, "__sycl_usm_array_interface__"): diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index 50d8e2479a..4ab28db479 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -109,6 +109,25 @@ def test_usm_ndarray_flags(): x.flags["C"] = False +def test_usm_ndarray_flags_bug_gh_1334(): + get_queue_or_skip() + a = dpt.ones((2, 3), dtype="u4") + r = dpt.reshape(a, (1, 6, 1)) + assert r.flags["C"] and r.flags["F"] + + a = dpt.ones((2, 3), dtype="u4", order="F") + r = dpt.reshape(a, (1, 6, 1), order="F") + assert r.flags["C"] and r.flags["F"] + + a = dpt.ones((2, 3, 4), dtype="i8") + r = dpt.sum(a, axis=(1, 2), keepdims=True) + assert r.flags["C"] and r.flags["F"] + + a = dpt.ones((2, 1), dtype="?") + r = a[:, 1::-1] + assert r.flags["F"] and r.flags["C"] + + @pytest.mark.parametrize( "dtype", [ @@ -1012,6 +1031,53 @@ def test_setitem_same_dtype(dtype, src_usm_type, dst_usm_type): Zusm_empty[Ellipsis] = Zusm_3d[0, 0, 0:0] +def test_setitem_broadcasting(): + get_queue_or_skip() + dst = dpt.ones((2, 3, 4), dtype="u4") + src = dpt.zeros((3, 1), dtype=dst.dtype) + dst[...] = src + expected = np.zeros(dst.shape, dtype=dst.dtype) + assert np.array_equal(dpt.asnumpy(dst), expected) + + +def test_setitem_broadcasting_empty_dst_validation(): + "Broadcasting rules apply, except exception" + get_queue_or_skip() + dst = dpt.ones((2, 0, 5, 4), dtype="i8") + src = dpt.ones((2, 0, 3, 4), dtype="i8") + with pytest.raises(ValueError): + dst[...] = src + + +def test_setitem_broadcasting_empty_dst_edge_case(): + """RHS is shunken to empty array by + broadasting rule, hence no exception""" + get_queue_or_skip() + dst = dpt.ones(1, dtype="i8")[0:0] + src = dpt.ones(tuple(), dtype="i8") + dst[...] = src + + +def test_setitem_broadcasting_src_ndim_equal_dst_ndim(): + get_queue_or_skip() + dst = dpt.ones((2, 3, 4), dtype="i4") + src = dpt.zeros((2, 1, 4), dtype="i4") + dst[...] = src + + expected = np.zeros(dst.shape, dtype=dst.dtype) + assert np.array_equal(dpt.asnumpy(dst), expected) + + +def test_setitem_broadcasting_src_ndim_greater_than_dst_ndim(): + get_queue_or_skip() + dst = dpt.ones((2, 3, 4), dtype="i4") + src = dpt.zeros((1, 2, 1, 4), dtype="i4") + dst[...] = src + + expected = np.zeros(dst.shape, dtype=dst.dtype) + assert np.array_equal(dpt.asnumpy(dst), expected) + + @pytest.mark.parametrize( "dtype", _all_dtypes,