Skip to content

Commit

Permalink
[DO NOT MERGE] PoC for the new design of `cuda::mr::{async_}resource_…
Browse files Browse the repository at this point in the history
…ref`

This adds support of the current `host_memory_resource` and `device_memory_resource` for the new design of `cuda::mr::{async_}resource_ref`

It is not perfect yet and meant as a first step to gain experience with the design and start early experimentation
  • Loading branch information
miscco committed Sep 30, 2022
1 parent d212232 commit 777d0f0
Show file tree
Hide file tree
Showing 10 changed files with 966 additions and 7 deletions.
11 changes: 9 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,16 @@ rapids_find_package(
rapids_cpm_init()
include(cmake/thirdparty/get_spdlog.cmake)
include(cmake/thirdparty/get_thrust.cmake)
include(cmake/thirdparty/get_libcudacxx.cmake)

# library targets
add_library(rmm INTERFACE)
add_library(rmm::rmm ALIAS rmm)

target_include_directories(rmm INTERFACE "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
"$<INSTALL_INTERFACE:include>")
target_include_directories(
rmm
INTERFACE "$<BUILD_INTERFACE:${LIBCUDACXX_INCLUDE_DIR}>"
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>" "$<INSTALL_INTERFACE:include>")

if(CUDA_STATIC_RUNTIME)
message(STATUS "RMM: Enabling static linking of cudart")
Expand Down Expand Up @@ -109,6 +112,10 @@ include(CPack)
# install export targets
install(TARGETS rmm EXPORT rmm-exports)
install(DIRECTORY include/rmm/ DESTINATION include/rmm)
install(
DIRECTORY ${RMM_GENERATED_INCLUDE_DIR}/include/libcxx
${RMM_GENERATED_INCLUDE_DIR}/include/libcudacxx
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rmm)
install(FILES ${RMM_BINARY_DIR}/include/rmm/version_config.hpp DESTINATION include/rmm)

set(doc_string
Expand Down
33 changes: 33 additions & 0 deletions cmake/thirdparty/get_libcudacxx.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# =============================================================================
# Copyright (c) 2020, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# =============================================================================

# Use CPM to find or clone libcudacxx
function(find_and_configure_libcudacxx VERSION)
rapids_cpm_find(
libcudacxx ${VERSION}
GIT_REPOSITORY https://github.com/miscco/libcudacxx.git
GIT_TAG memory_resource # ${VERSION}
GIT_SHALLOW TRUE DOWNLOAD_ONLY TRUE)

set(LIBCUDACXX_INCLUDE_DIR
"${libcudacxx_SOURCE_DIR}/include"
PARENT_SCOPE)
set(LIBCXX_INCLUDE_DIR
"${libcudacxx_SOURCE_DIR}/libcxx/include"
PARENT_SCOPE)
endfunction()

set(RMM_MIN_VERSION_libcudacxx 1.5.0)

find_and_configure_libcudacxx(${RMM_MIN_VERSION_libcudacxx})
6 changes: 6 additions & 0 deletions include/rmm/cuda_stream_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <atomic>
#include <cstddef>
#include <cstdint>
#include <cuda/stream_ref>

namespace rmm {

Expand Down Expand Up @@ -60,6 +61,11 @@ class cuda_stream_view {
* @brief Implicit conversion to cudaStream_t.
*/
constexpr operator cudaStream_t() const noexcept { return value(); }

/**
* @brief Implicit conversion to stream_ref.
*/
constexpr operator cuda::stream_ref() const noexcept { return value(); }

/**
* @brief Return true if the wrapped stream is the CUDA per-thread default stream.
Expand Down
171 changes: 166 additions & 5 deletions include/rmm/mr/device/device_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,16 @@
#include <rmm/cuda_stream_view.hpp>
#include <rmm/detail/aligned.hpp>

#include <cuda/memory_resource>

#include <cstddef>
#include <utility>

namespace rmm::mr {
/**
* @brief Tag type that enables the allocate_async interface
*/
struct supports_allocate_async {};

/**
* @brief Base class for all libcudf device memory allocation.
Expand Down Expand Up @@ -81,11 +87,11 @@ namespace rmm::mr {
*/
class device_memory_resource {
public:
device_memory_resource() = default;
virtual ~device_memory_resource() = default;
device_memory_resource(device_memory_resource const&) = default;
device_memory_resource& operator=(device_memory_resource const&) = default;
device_memory_resource(device_memory_resource&&) noexcept = default;
device_memory_resource() = default;
virtual ~device_memory_resource() = default;
device_memory_resource(device_memory_resource const&) = default;
device_memory_resource& operator=(device_memory_resource const&) = default;
device_memory_resource(device_memory_resource&&) noexcept = default;
device_memory_resource& operator=(device_memory_resource&&) noexcept = default;

/**
Expand Down Expand Up @@ -149,6 +155,158 @@ class device_memory_resource {
return do_is_equal(other);
}

/**
* @brief Allocates memory of size at least \p bytes.
*
* The returned pointer will have at minimum 256 byte alignment.
*
* If supported, this operation may optionally be executed on a stream.
* Otherwise, the stream is ignored and the null stream is used.
*
* @throws `rmm::bad_alloc` When the requested `bytes` cannot be allocated on
* the specified `stream`.
*
* @param bytes The size of the allocation
* @param alignment The expected alignment of the allocation
* @return void* Pointer to the newly allocated memory
*/
void* allocate(std::size_t bytes, std::size_t alignment)
{
return do_allocate(rmm::detail::align_up(bytes, alignment), cuda_stream_view{});
}

/**
* @brief Deallocate memory pointed to by \p p.
*
* `p` must have been returned by a prior call to `allocate(bytes,stream)` on
* a `device_memory_resource` that compares equal to `*this`, and the storage
* it points to must not yet have been deallocated, otherwise behavior is
* undefined.
*
* If supported, this operation may optionally be executed on a stream.
* Otherwise, the stream is ignored and the null stream is used.
*
* @throws Nothing.
*
* @param p Pointer to be deallocated
* @param bytes The size in bytes of the allocation. This must be equal to the
* value of `bytes` that was passed to the `allocate` call that returned `p`.
* @param alignment The alignment that was passed to the `allocate` call that returned `p`
*/
void deallocate(void* ptr, std::size_t bytes, std::size_t alignment)
{
do_deallocate(ptr, rmm::detail::align_up(bytes, alignment), cuda_stream_view{});
}

/**
* @brief Allocates memory of size at least \p bytes.
*
* The returned pointer will have at minimum 256 byte alignment.
*
* If supported, this operation may optionally be executed on a stream.
* Otherwise, the stream is ignored and the null stream is used.
*
* @throws `rmm::bad_alloc` When the requested `bytes` cannot be allocated on
* the specified `stream`.
*
* @param bytes The size of the allocation
* @param alignment The expected alignment of the allocation
* @param stream Stream on which to perform allocation
* @return void* Pointer to the newly allocated memory
*/
template <class Res = device_memory_resource,
std::enable_if_t<cuda::mr::has_property<Res, supports_allocate_async>, int> = 0>
void* allocate_async(std::size_t bytes, std::size_t alignment, cuda_stream_view stream)
{
return do_allocate(rmm::detail::align_up(bytes, alignment), stream);
}

/**
* @brief Allocates memory of size at least \p bytes.
*
* The returned pointer will have at minimum 256 byte alignment.
*
* If supported, this operation may optionally be executed on a stream.
* Otherwise, the stream is ignored and the null stream is used.
*
* @throws `rmm::bad_alloc` When the requested `bytes` cannot be allocated on
* the specified `stream`.
*
* @param bytes The size of the allocation
* @param stream Stream on which to perform allocation
* @return void* Pointer to the newly allocated memory
*/
template <class Res = device_memory_resource,
std::enable_if_t<cuda::mr::has_property<Res, supports_allocate_async>, int> = 0>
void* allocate_async(std::size_t bytes, cuda_stream_view stream)
{
return do_allocate(rmm::detail::align_up(bytes, allocation_size_alignment), stream);
}

/**
* @brief Deallocate memory pointed to by \p p.
*
* `p` must have been returned by a prior call to `allocate(bytes,stream)` on
* a `device_memory_resource` that compares equal to `*this`, and the storage
* it points to must not yet have been deallocated, otherwise behavior is
* undefined.
*
* If supported, this operation may optionally be executed on a stream.
* Otherwise, the stream is ignored and the null stream is used.
*
* @throws Nothing.
*
* @param p Pointer to be deallocated
* @param bytes The size in bytes of the allocation. This must be equal to the
* value of `bytes` that was passed to the `allocate` call that returned `p`.
* @param alignment The alignment that was passed to the `allocate` call that returned `p`
* @param stream Stream on which to perform allocation
*/
template <class Res = device_memory_resource,
std::enable_if_t<cuda::mr::has_property<Res, supports_allocate_async>, int> = 0>
void deallocate_async(void* ptr,
std::size_t bytes,
std::size_t alignment,
cuda_stream_view stream)
{
do_deallocate(ptr, rmm::detail::align_up(bytes, alignment), stream);
}

/**
* @brief Deallocate memory pointed to by \p p.
*
* `p` must have been returned by a prior call to `allocate(bytes,stream)` on
* a `device_memory_resource` that compares equal to `*this`, and the storage
* it points to must not yet have been deallocated, otherwise behavior is
* undefined.
*
* If supported, this operation may optionally be executed on a stream.
* Otherwise, the stream is ignored and the null stream is used.
*
* @throws Nothing.
*
* @param p Pointer to be deallocated
* @param bytes The size in bytes of the allocation. This must be equal to the
* value of `bytes` that was passed to the `allocate` call that returned `p`.
* @param stream Stream on which to perform allocation
*/
template <class Res = device_memory_resource,
std::enable_if_t<cuda::mr::has_property<Res, supports_allocate_async>, int> = 0>
void deallocate_async(void* ptr, std::size_t bytes, cuda_stream_view stream)
{
do_deallocate(ptr, rmm::detail::align_up(bytes, allocation_size_alignment), stream);
}

[[nodiscard]] bool operator==(device_memory_resource const& other) const noexcept
{
return do_is_equal(other);
}

[[nodiscard]] bool operator!=(device_memory_resource const& other) const noexcept
{
return !do_is_equal(other);
}

/**
* @brief Query whether the resource supports use of non-null CUDA streams for
* allocation/deallocation.
Expand Down Expand Up @@ -177,6 +335,8 @@ class device_memory_resource {
return do_get_mem_info(stream);
}

friend void get_property(device_memory_resource const&, cuda::mr::device_accessible) noexcept {}

private:
// All allocations are padded to a multiple of allocation_size_alignment bytes.
static constexpr auto allocation_size_alignment = std::size_t{8};
Expand Down Expand Up @@ -238,4 +398,5 @@ class device_memory_resource {
[[nodiscard]] virtual std::pair<std::size_t, std::size_t> do_get_mem_info(
cuda_stream_view stream) const = 0;
};
static_assert(cuda::mr::resource_with<device_memory_resource, cuda::mr::device_accessible>, "");
} // namespace rmm::mr
16 changes: 16 additions & 0 deletions include/rmm/mr/host/host_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
*/
#pragma once

#include <cuda/memory_resource>

#include <cstddef>
#include <utility>

Expand Down Expand Up @@ -107,6 +109,18 @@ class host_memory_resource {
{
return do_is_equal(other);
}

[[nodiscard]] bool operator==(host_memory_resource const& other) const noexcept
{
return do_is_equal(other);
}

[[nodiscard]] bool operator!=(host_memory_resource const& other) const noexcept
{
return !do_is_equal(other);
}

friend void get_property(host_memory_resource const&, cuda::mr::host_accessible) noexcept {}

private:
/**
Expand Down Expand Up @@ -160,4 +174,6 @@ class host_memory_resource {
return this == &other;
}
};
static_assert(cuda::mr::resource_with<host_memory_resource, cuda::mr::host_accessible>, "");

} // namespace rmm::mr
6 changes: 6 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ endfunction()
# device mr tests
ConfigureTest(DEVICE_MR_TEST mr/device/mr_tests.cpp mr/device/mr_multithreaded_tests.cpp)

# device mr tests
ConfigureTest(DEVICE_MR_REF_TEST mr/device/mr_ref_tests.cpp)

# general adaptor tests
ConfigureTest(ADAPTOR_TEST mr/device/adaptor_tests.cpp)

Expand Down Expand Up @@ -130,6 +133,9 @@ ConfigureTest(LIMITING_TEST mr/device/limiting_mr_tests.cpp)
# host mr tests
ConfigureTest(HOST_MR_TEST mr/host/mr_tests.cpp)

# host mr_ref tests
ConfigureTest(HOST_MR_REF_TEST mr/host/mr_ref_tests.cpp)

# cuda stream tests
ConfigureTest(CUDA_STREAM_TEST cuda_stream_tests.cpp cuda_stream_pool_tests.cpp)

Expand Down
Loading

0 comments on commit 777d0f0

Please sign in to comment.