Skip to content

Commit

Permalink
Implement cuda::uninitialized_buffer
Browse files Browse the repository at this point in the history
This `uninitialized_buffer` provides an allocation of `N` elements of type `T` utilitzing a `cuda::mr::resource` to allocate the storage.

The buffer takes care of alignment and deallocation of the storage. The user is required to ensure that the lifetime of the memory resource exceeds the lifetime of the buffer.
  • Loading branch information
miscco committed Jun 14, 2024
1 parent 2361010 commit 41ee97a
Show file tree
Hide file tree
Showing 8 changed files with 367 additions and 0 deletions.
204 changes: 204 additions & 0 deletions cudax/include/cuda/experimental/__container/uninitialized_buffer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
//===----------------------------------------------------------------------===//
//
// Part of the CUDA Toolkit, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef __CUDAX__CONTAINERS_UNINITIALIZED_BUFFER_H
#define __CUDAX__CONTAINERS_UNINITIALIZED_BUFFER_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cuda/__memory_resource/properties.h>
#include <cuda/__memory_resource/resource_ref.h>
#include <cuda/std/__concepts/_One_of.h>
#include <cuda/std/__memory/align.h>
#include <cuda/std/span>
#include <cuda/stream_ref>

#if _CCCL_STD_VER >= 2014 && !defined(_CCCL_COMPILER_MSVC_2017) \
&& defined(LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE)

//! @file The \c uninitialized_buffer class provides a typed buffer allocated from a given memory resource.
namespace cuda::experimental
{

//! @rst
//! .. _cudax-containers-uninitialized-buffer:
//!
//! Uninitialized type safe memory storage
//! ---------------------------------------
//!
//! ``uninitialized_buffer`` provides a typed buffer allocated from a given :ref:`memory resource
//! <libcudacxx-extended-api-memory-resources-resource>`. It handles alignment and release of the allocation.
//! The memory is uninitialized, so that a user needs to ensure elements are properly constructed.
//!
//! In addition to being type safe, ``uninitialized_buffer`` also takes a set of :ref:`properties
//! <libcudacxx-extended-api-memory-resources-properties>` to ensure that e.g. execution space constraints are checked
//! at compile time. However, we can only forward stateless properties. If a user wants to use a stateful one, then they
//! need to implement :ref:`get_property(const device_buffer&, Property)
//! <libcudacxx-extended-api-memory-resources-properties>`.
//!
//! .. note::
//!
//! ``uninitialized_buffer`` stores a reference to the provided memory :ref:`memory resource
//! <libcudacxx-extended-api-memory-resources-resource>`. It is the users resposibility to ensure the lifetime of the
//! resource exceeds the lifetime of the buffer.
//!
//! @endrst
//! @tparam T the type to be stored in the buffer
//! @tparam Properties... The properties the allocated memory satisfies
template <class _Tp, class... _Properties>
class uninitialized_buffer
{
private:
_CUDA_VMR::resource_ref<_Properties...> __mr_;
size_t __count_ = 0;
void* __buf_ = nullptr;

//! @brief Determines the allocation size given the alignment and size of `T`
_CCCL_NODISCARD _CCCL_HOST_DEVICE static constexpr size_t __get_allocation_size(const size_t __count) noexcept
{
constexpr size_t __alignment = alignof(_Tp);
return (__count * sizeof(_Tp) + (__alignment - 1)) & ~(__alignment - 1);
}

//! @brief Determines the properly aligned start of the buffer given the alignment and size of `T`
_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr _Tp* __get_data() const noexcept
{
constexpr size_t __alignment = alignof(_Tp);
size_t __space = __get_allocation_size(__count_);
void* __ptr = __buf_;
return reinterpret_cast<_Tp*>(_CUDA_VSTD::align(__alignment, __count_ * sizeof(_Tp), __ptr, __space));
}

public:
using value_type = _Tp;
using reference = _Tp&;
using pointer = _Tp*;
using size_type = size_t;

//! @brief Constructs a \c uninitialized_buffer, allocating sufficient storage for \p count elements through \p mr
//! @param mr The memory resource to allocate the buffer with.
//! @param count The desired size of the buffer.
//! @note Depending on the alignment requirements of `T` the size of the underlying allocation might be larger
//! than `count * sizeof(T)`. Only allocates memory when \p count > 0
uninitialized_buffer(_CUDA_VMR::resource_ref<_Properties...> __mr, const size_t __count)
: __mr_(__mr)
, __count_(__count)
, __buf_(__count_ == 0 ? nullptr : __mr_.allocate(__get_allocation_size(__count_)))
{}

uninitialized_buffer(const uninitialized_buffer&) = delete;
uninitialized_buffer& operator=(const uninitialized_buffer&) = delete;

//! @brief Move construction
//! @param other Another \c uninitialized_buffer
uninitialized_buffer(uninitialized_buffer&& __other) noexcept
: __mr_(__other.__mr_)
, __count_(__other.__count_)
, __buf_(__other.__buf_)
{
__other.__count_ = 0;
__other.__buf_ = nullptr;
}

//! @brief Move assignment
//! @param other Another \c uninitialized_buffer
uninitialized_buffer& operator=(uninitialized_buffer&& __other) noexcept
{
if (__buf_)
{
__mr_.deallocate(__buf_, __get_allocation_size(__count_));
}
__mr_ = __other.__mr_;
__count_ = __other.__count_;
__buf_ = __other.__buf_;
__other.__count_ = 0;
__other.__buf_ = nullptr;
return *this;
}

//! @brief Destroys an \c uninitialized_buffer deallocating the buffer
//! @warning The destructor does not destroy any objects that may or may not reside within the buffer. It is the users
//! responsibility to ensure that all objects within the buffer have been properly destroyed.
~uninitialized_buffer()
{
if (__buf_)
{
__mr_.deallocate(__buf_, __get_allocation_size(__count_));
}
}

//! @brief Returns an aligned pointer to the buffer
_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr pointer begin() const noexcept
{
return __get_data();
}

//! @brief Returns an aligned pointer to end of the buffer
_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr pointer end() const noexcept
{
return __get_data() + __count_;
}

//! @brief Returns an aligned pointer to the buffer
_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr pointer data() const noexcept
{
return __get_data();
}

//! @brief Returns the size of the buffer
_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr size_type size() const noexcept
{
return __count_;
}

//! @brief Swaps the contents with those of another \c uninitialized_buffer
//! @param other The other \c uninitialized_buffer.
_CCCL_HOST_DEVICE constexpr void swap(uninitialized_buffer& __other) noexcept
{
_CUDA_VSTD::swap(__mr_, __other.__mr_);
_CUDA_VSTD::swap(__count_, __other.__count_);
_CUDA_VSTD::swap(__buf_, __other.__buf_);
}

//! @brief Returns the \c resource_ref used to allocate the buffer
_CCCL_NODISCARD _CCCL_HOST_DEVICE _CUDA_VMR::resource_ref<_Properties...> resource() noexcept
{
return __mr_;
}

//! @brief Returns the \c resource_ref used to allocate the buffer
_CCCL_NODISCARD _CCCL_HOST_DEVICE _CUDA_VMR::resource_ref<_Properties...> resource() const noexcept
{
return __mr_;
}

# ifndef DOXYGEN_SHOULD_SKIP_THIS // friend functions are currently brocken
//! @brief Forwards the passed Properties
_LIBCUDACXX_TEMPLATE(class _Property)
_LIBCUDACXX_REQUIRES((!property_with_value<_Property>) _LIBCUDACXX_AND _CUDA_VSTD::_One_of<_Property, _Properties...>)
friend constexpr void get_property(const uninitialized_buffer&, _Property) noexcept {}
# endif // DOXYGEN_SHOULD_SKIP_THIS
};

template <class _Tp>
using uninitialized_device_buffer = uninitialized_buffer<_Tp, _CUDA_VMR::device_accessible>;

} // namespace cuda::experimental

#endif // _CCCL_STD_VER >= 2014 && !_CCCL_COMPILER_MSVC_2017 && LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE

#endif //__CUDAX__CONTAINERS_UNINITIALIZED_BUFFER_H
25 changes: 25 additions & 0 deletions cudax/include/cuda/experimental/buffer
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
//===----------------------------------------------------------------------===//
//
// Part of the CUDA Toolkit, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef __CUDAX_BUFFER
#define __CUDAX_BUFFER

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cuda/experimental/__container/uninitialized_buffer.h>

#endif //_CUDA_BUFFER
3 changes: 3 additions & 0 deletions cudax/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ foreach(cn_target IN LISTS cudax_TARGETS)
memory_resource/cuda_memory_pool.cu
memory_resource/cuda_async_memory_resource.cu
)
cudax_add_catch2_test(test_target containers ${cn_target}
containers/uninitialized_buffer.cu
)

target_compile_options(${test_target} PRIVATE $<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:--extended-lambda>)
endforeach()
98 changes: 98 additions & 0 deletions cudax/test/containers/uninitialized_buffer.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
//===----------------------------------------------------------------------===//
//
// Part of CUDA Experimental in CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#include <cuda/experimental/buffer>
#include <cuda/memory_resource>
#include <cuda/std/cstdint>
#include <cuda/std/type_traits>
#include <cuda/std/utility>

#include <catch2/catch.hpp>

struct do_not_construct
{
do_not_construct()
{
CHECK(false);
}
};

struct my_property
{
using value_type = int;
};
constexpr int get_property(const cuda::experimental::uninitialized_buffer<int, my_property>&, my_property)
{
return 42;
}

TEMPLATE_TEST_CASE(
"uninitialized_buffer", "[memory_resource]", char, short, int, long, long long, float, double, do_not_construct)
{
using uninitialized_buffer = cuda::experimental::uninitialized_buffer<TestType>;
static_assert(!cuda::std::is_default_constructible<uninitialized_buffer>::value, "");
static_assert(!cuda::std::is_copy_constructible<uninitialized_buffer>::value, "");
static_assert(!cuda::std::is_copy_assignable<uninitialized_buffer>::value, "");

cuda::mr::cuda_memory_resource resource{};

SECTION("construction")
{
{
uninitialized_buffer from_count{resource, 42};
CHECK(from_count.data() != nullptr);
CHECK(from_count.size() == 42);
}
{
uninitialized_buffer input{resource, 42};
const TestType* ptr = input.data();

uninitialized_buffer from_rvalue{cuda::std::move(input)};
CHECK(from_rvalue.data() == ptr);
CHECK(from_rvalue.size() == 42);

// Ensure that we properly reset the input buffer
CHECK(input.data() == nullptr);
CHECK(input.size() == 0);
}
}

SECTION("access")
{
uninitialized_buffer buf{resource, 42};
CHECK(buf.data() != nullptr);
CHECK(buf.size() == 42);
CHECK(buf.begin() == buf.data());
CHECK(buf.end() == buf.begin() + buf.size());
CHECK(buf.resource() == resource);

CHECK(cuda::std::as_const(buf).data() != nullptr);
CHECK(cuda::std::as_const(buf).size() == 42);
CHECK(cuda::std::as_const(buf).begin() == buf.data());
CHECK(cuda::std::as_const(buf).end() == buf.begin() + buf.size());
CHECK(cuda::std::as_const(buf).resource() == resource);
}

SECTION("properties")
{
static_assert(cuda::has_property<cuda::experimental::uninitialized_buffer<int, cuda::mr::device_accessible>,
cuda::mr::device_accessible>,
"");
static_assert(cuda::has_property<cuda::experimental::uninitialized_buffer<int, my_property>, my_property>, "");
}

SECTION("convertion to span")
{
uninitialized_buffer buf{resource, 42};
const cuda::std::span<TestType> as_span{buf};
CHECK(as_span.data() == buf.data());
CHECK(as_span.size() == 42);
}
}
27 changes: 27 additions & 0 deletions docs/cudax/container.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
.. _cudax-containers:

Containers library
===================

The headers of the container library provide facilities to store elements on the heap. They are heavily inspired by the
C++ `containers library <https://en.cppreference.com/w/cpp/container>`__ but deviate from the standard provided ones due to different requirements from
heterogeneous systems.

They build upon :ref:`memory_resources <libcudacxx-extended-api-memory-resources>` to ensure that e.g. execution space
annotations are checked by the type system.

Uninitialized buffers
---------------------

The ``<cuda/experimental/buffer>`` header contains facilities, that provide *heterogeneous* allocations to store objects
in uninitialized memory. This is a common request in HPC due to the high cost of initialization of large arrays.

.. warning::

It is the users responsibility to ensure that any object is properly initialized before it is used and also destroyed
before the underlying storage is deallocated.

.. toctree::
:maxdepth: 3

container/uninitialized_buffer
5 changes: 5 additions & 0 deletions docs/cudax/container/uninitialized_buffer.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
uninitialized_buffer
====================

.. doxygenclass:: cuda::experimental::uninitialized_buffer
:members:
1 change: 1 addition & 0 deletions docs/cudax/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ CUDA Experimental
:maxdepth: 3

memory_resource
container
${repo_docs_api_path}/cudax_api

``CUDA Experimental`` (``cudax``) provides experimental new features that are still in development and subject to change.
Expand Down
4 changes: 4 additions & 0 deletions docs/repo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@ deps = [
]

doxygen_input = [
"../../cudax/include/cuda/experimental/__container/*.h",
"../../cudax/include/cuda/experimental/__memory_resource/*.h",
]

Expand All @@ -305,6 +306,9 @@ doxygen_predefined = [
"_CCCL_NODISCARD=[[nodiscard]]",
"_CCCL_NODISCARD_FRIEND=",
"_CCCL_STD_VER=2020",
"_CUDA_VMR=cuda::mr",
"_CUDA_VSTD=cuda::std",
"LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE=",
"_LIBCUDACXX_EAT_REST(x)=",
"_LIBCUDACXX_TRAILING_REQUIRES(x)=-> x _LIBCUDACXX_EAT_REST",
]
Expand Down

0 comments on commit 41ee97a

Please sign in to comment.