From 27f3ae2e4bf2f34b0a590342a6ac07d969a8088a Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Tue, 21 May 2024 15:31:12 +0200 Subject: [PATCH] Initial commit --- cpp/src/arrow/array/array_nested.cc | 27 +++++++++++++-------------- cpp/src/arrow/array/array_nested.h | 9 ++++++--- python/pyarrow/array.pxi | 11 ++++++++--- python/pyarrow/includes/libarrow.pxd | 8 ++++++-- 4 files changed, 33 insertions(+), 22 deletions(-) diff --git a/cpp/src/arrow/array/array_nested.cc b/cpp/src/arrow/array/array_nested.cc index 1be771d8228d9..ea34b39aa5aaf 100644 --- a/cpp/src/arrow/array/array_nested.cc +++ b/cpp/src/arrow/array/array_nested.cc @@ -807,7 +807,7 @@ MapArray::MapArray(const std::shared_ptr& type, int64_t length, Result> MapArray::FromArraysInternal( std::shared_ptr type, const std::shared_ptr& offsets, const std::shared_ptr& keys, const std::shared_ptr& items, - MemoryPool* pool) { + MemoryPool* pool, const std::shared_ptr& null_bitmap) { using offset_type = typename MapType::offset_type; using OffsetArrowType = typename CTypeTraits::ArrowType; @@ -829,31 +829,30 @@ Result> MapArray::FromArraysInternal( if (offsets->null_count() > 0) { ARROW_ASSIGN_OR_RAISE(auto buffers, - CleanListOffsets(NULLPTR, *offsets, pool)); + CleanListOffsets(null_bitmap, *offsets, pool)); return std::make_shared(type, offsets->length() - 1, std::move(buffers), keys, items, offsets->null_count(), 0); } using OffsetArrayType = typename TypeTraits::ArrayType; const auto& typed_offsets = checked_cast(*offsets); - auto buffers = BufferVector({nullptr, typed_offsets.values()}); + auto buffers = BufferVector({null_bitmap, typed_offsets.values()}); return std::make_shared(type, offsets->length() - 1, std::move(buffers), keys, items, /*null_count=*/0, offsets->offset()); } -Result> MapArray::FromArrays(const std::shared_ptr& offsets, - const std::shared_ptr& keys, - const std::shared_ptr& items, - MemoryPool* pool) { +Result> MapArray::FromArrays( + const std::shared_ptr& offsets, const std::shared_ptr& keys, + const std::shared_ptr& items, MemoryPool* pool, + const std::shared_ptr& null_bitmap) { return FromArraysInternal(std::make_shared(keys->type(), items->type()), - offsets, keys, items, pool); + offsets, keys, items, pool, null_bitmap); } -Result> MapArray::FromArrays(std::shared_ptr type, - const std::shared_ptr& offsets, - const std::shared_ptr& keys, - const std::shared_ptr& items, - MemoryPool* pool) { +Result> MapArray::FromArrays( + std::shared_ptr type, const std::shared_ptr& offsets, + const std::shared_ptr& keys, const std::shared_ptr& items, + MemoryPool* pool, const std::shared_ptr& null_bitmap) { if (type->id() != Type::MAP) { return Status::TypeError("Expected map type, got ", type->ToString()); } @@ -864,7 +863,7 @@ Result> MapArray::FromArrays(std::shared_ptr ty if (!map_type.item_type()->Equals(items->type())) { return Status::TypeError("Mismatching map items type"); } - return FromArraysInternal(std::move(type), offsets, keys, items, pool); + return FromArraysInternal(std::move(type), offsets, keys, items, pool, null_bitmap); } Status MapArray::ValidateChildData( diff --git a/cpp/src/arrow/array/array_nested.h b/cpp/src/arrow/array/array_nested.h index 5744f5fcadf05..f96b6bd3b1346 100644 --- a/cpp/src/arrow/array/array_nested.h +++ b/cpp/src/arrow/array/array_nested.h @@ -532,15 +532,18 @@ class ARROW_EXPORT MapArray : public ListArray { /// \param[in] keys Array containing key values /// \param[in] items Array containing item values /// \param[in] pool MemoryPool in case new offsets array needs to be + /// \param[in] null_bitmap Optional validity bitmap /// allocated because of null values static Result> FromArrays( const std::shared_ptr& offsets, const std::shared_ptr& keys, - const std::shared_ptr& items, MemoryPool* pool = default_memory_pool()); + const std::shared_ptr& items, MemoryPool* pool = default_memory_pool(), + const std::shared_ptr& null_bitmap = NULLPTR); static Result> FromArrays( std::shared_ptr type, const std::shared_ptr& offsets, const std::shared_ptr& keys, const std::shared_ptr& items, - MemoryPool* pool = default_memory_pool()); + MemoryPool* pool = default_memory_pool(), + const std::shared_ptr& null_bitmap = NULLPTR); const MapType* map_type() const { return map_type_; } @@ -560,7 +563,7 @@ class ARROW_EXPORT MapArray : public ListArray { static Result> FromArraysInternal( std::shared_ptr type, const std::shared_ptr& offsets, const std::shared_ptr& keys, const std::shared_ptr& items, - MemoryPool* pool); + MemoryPool* pool, const std::shared_ptr& null_bitmap = NULLPTR); private: const MapType* map_type_; diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 406830ad4dd69..3c26e85887466 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -3060,7 +3060,7 @@ cdef class MapArray(ListArray): """ @staticmethod - def from_arrays(offsets, keys, items, DataType type=None, MemoryPool pool=None): + def from_arrays(offsets, keys, items, DataType type=None, MemoryPool pool=None, mask=None): """ Construct MapArray from arrays of int32 offsets and key, item arrays. @@ -3072,6 +3072,8 @@ cdef class MapArray(ListArray): type : DataType, optional If not specified, a default MapArray with the keys' and items' type is used. pool : MemoryPool + mask : Array (boolean type), optional + Indicate which values are null (True) or not null (False). Returns ------- @@ -3153,24 +3155,27 @@ cdef class MapArray(ListArray): cdef: Array _offsets, _keys, _items shared_ptr[CArray] out + shared_ptr[CBuffer] c_mask cdef CMemoryPool* cpool = maybe_unbox_memory_pool(pool) _offsets = asarray(offsets, type='int32') _keys = asarray(keys) _items = asarray(items) + c_mask = c_mask_inverted_from_obj(mask, pool) + if type is not None: with nogil: out = GetResultValue( CMapArray.FromArraysAndType( type.sp_type, _offsets.sp_array, - _keys.sp_array, _items.sp_array, cpool)) + _keys.sp_array, _items.sp_array, cpool, c_mask)) else: with nogil: out = GetResultValue( CMapArray.FromArrays(_offsets.sp_array, _keys.sp_array, - _items.sp_array, cpool)) + _items.sp_array, cpool, c_mask)) cdef Array result = pyarrow_wrap_array(out) result.validate() return result diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index f461513e8b3cf..b9d316d6dfd57 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -788,7 +788,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: const shared_ptr[CArray]& offsets, const shared_ptr[CArray]& keys, const shared_ptr[CArray]& items, - CMemoryPool* pool) + CMemoryPool* pool, + const shared_ptr[CBuffer] null_bitmap, + ) @staticmethod CResult[shared_ptr[CArray]] FromArraysAndType" FromArrays"( @@ -796,7 +798,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: const shared_ptr[CArray]& offsets, const shared_ptr[CArray]& keys, const shared_ptr[CArray]& items, - CMemoryPool* pool) + CMemoryPool* pool, + const shared_ptr[CBuffer] null_bitmap, + ) shared_ptr[CArray] keys() shared_ptr[CArray] items()