From 777d0f05cefc615045ae50b18c7a95b3ca2edd0a Mon Sep 17 00:00:00 2001 From: Michael Schellenberger Costa Date: Fri, 30 Sep 2022 10:44:56 +0200 Subject: [PATCH] [DO NOT MERGE] PoC for the new design of `cuda::mr::{async_}resource_ref` This adds support of the current `host_memory_resource` and `device_memory_resource` for the new design of `cuda::mr::{async_}resource_ref` It is not perfect yet and meant as a first step to gain experience with the design and start early experimentation --- CMakeLists.txt | 11 +- cmake/thirdparty/get_libcudacxx.cmake | 33 ++ include/rmm/cuda_stream_view.hpp | 6 + .../rmm/mr/device/device_memory_resource.hpp | 171 +++++++++- include/rmm/mr/host/host_memory_resource.hpp | 16 + tests/CMakeLists.txt | 6 + tests/mr/device/mr_ref_test.hpp | 295 ++++++++++++++++++ tests/mr/device/mr_ref_tests.cpp | 169 ++++++++++ tests/mr/host/mr_ref_tests.cpp | 262 ++++++++++++++++ tests/mr/host/mr_tests.cpp | 4 + 10 files changed, 966 insertions(+), 7 deletions(-) create mode 100644 cmake/thirdparty/get_libcudacxx.cmake create mode 100644 tests/mr/device/mr_ref_test.hpp create mode 100644 tests/mr/device/mr_ref_tests.cpp create mode 100644 tests/mr/host/mr_ref_tests.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 23cde1de6..7aeb14063 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -58,13 +58,16 @@ rapids_find_package( rapids_cpm_init() include(cmake/thirdparty/get_spdlog.cmake) include(cmake/thirdparty/get_thrust.cmake) +include(cmake/thirdparty/get_libcudacxx.cmake) # library targets add_library(rmm INTERFACE) add_library(rmm::rmm ALIAS rmm) -target_include_directories(rmm INTERFACE "$" - "$") +target_include_directories( + rmm + INTERFACE "$" + "$" "$") if(CUDA_STATIC_RUNTIME) message(STATUS "RMM: Enabling static linking of cudart") @@ -109,6 +112,10 @@ include(CPack) # install export targets install(TARGETS rmm EXPORT rmm-exports) install(DIRECTORY include/rmm/ DESTINATION include/rmm) +install( + DIRECTORY ${RMM_GENERATED_INCLUDE_DIR}/include/libcxx + ${RMM_GENERATED_INCLUDE_DIR}/include/libcudacxx + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rmm) install(FILES ${RMM_BINARY_DIR}/include/rmm/version_config.hpp DESTINATION include/rmm) set(doc_string diff --git a/cmake/thirdparty/get_libcudacxx.cmake b/cmake/thirdparty/get_libcudacxx.cmake new file mode 100644 index 000000000..d5dd00ad0 --- /dev/null +++ b/cmake/thirdparty/get_libcudacxx.cmake @@ -0,0 +1,33 @@ +# ============================================================================= +# Copyright (c) 2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Use CPM to find or clone libcudacxx +function(find_and_configure_libcudacxx VERSION) + rapids_cpm_find( + libcudacxx ${VERSION} + GIT_REPOSITORY https://github.com/miscco/libcudacxx.git + GIT_TAG memory_resource # ${VERSION} + GIT_SHALLOW TRUE DOWNLOAD_ONLY TRUE) + + set(LIBCUDACXX_INCLUDE_DIR + "${libcudacxx_SOURCE_DIR}/include" + PARENT_SCOPE) + set(LIBCXX_INCLUDE_DIR + "${libcudacxx_SOURCE_DIR}/libcxx/include" + PARENT_SCOPE) +endfunction() + +set(RMM_MIN_VERSION_libcudacxx 1.5.0) + +find_and_configure_libcudacxx(${RMM_MIN_VERSION_libcudacxx}) \ No newline at end of file diff --git a/include/rmm/cuda_stream_view.hpp b/include/rmm/cuda_stream_view.hpp index f913609f9..29cd9063f 100644 --- a/include/rmm/cuda_stream_view.hpp +++ b/include/rmm/cuda_stream_view.hpp @@ -23,6 +23,7 @@ #include #include #include +#include namespace rmm { @@ -60,6 +61,11 @@ class cuda_stream_view { * @brief Implicit conversion to cudaStream_t. */ constexpr operator cudaStream_t() const noexcept { return value(); } + + /** + * @brief Implicit conversion to stream_ref. + */ + constexpr operator cuda::stream_ref() const noexcept { return value(); } /** * @brief Return true if the wrapped stream is the CUDA per-thread default stream. diff --git a/include/rmm/mr/device/device_memory_resource.hpp b/include/rmm/mr/device/device_memory_resource.hpp index 52aa8c79f..4ffb315f2 100644 --- a/include/rmm/mr/device/device_memory_resource.hpp +++ b/include/rmm/mr/device/device_memory_resource.hpp @@ -18,10 +18,16 @@ #include #include +#include + #include #include namespace rmm::mr { +/** + * @brief Tag type that enables the allocate_async interface + */ +struct supports_allocate_async {}; /** * @brief Base class for all libcudf device memory allocation. @@ -81,11 +87,11 @@ namespace rmm::mr { */ class device_memory_resource { public: - device_memory_resource() = default; - virtual ~device_memory_resource() = default; - device_memory_resource(device_memory_resource const&) = default; - device_memory_resource& operator=(device_memory_resource const&) = default; - device_memory_resource(device_memory_resource&&) noexcept = default; + device_memory_resource() = default; + virtual ~device_memory_resource() = default; + device_memory_resource(device_memory_resource const&) = default; + device_memory_resource& operator=(device_memory_resource const&) = default; + device_memory_resource(device_memory_resource&&) noexcept = default; device_memory_resource& operator=(device_memory_resource&&) noexcept = default; /** @@ -149,6 +155,158 @@ class device_memory_resource { return do_is_equal(other); } + /** + * @brief Allocates memory of size at least \p bytes. + * + * The returned pointer will have at minimum 256 byte alignment. + * + * If supported, this operation may optionally be executed on a stream. + * Otherwise, the stream is ignored and the null stream is used. + * + * @throws `rmm::bad_alloc` When the requested `bytes` cannot be allocated on + * the specified `stream`. + * + * @param bytes The size of the allocation + * @param alignment The expected alignment of the allocation + * @return void* Pointer to the newly allocated memory + */ + void* allocate(std::size_t bytes, std::size_t alignment) + { + return do_allocate(rmm::detail::align_up(bytes, alignment), cuda_stream_view{}); + } + + /** + * @brief Deallocate memory pointed to by \p p. + * + * `p` must have been returned by a prior call to `allocate(bytes,stream)` on + * a `device_memory_resource` that compares equal to `*this`, and the storage + * it points to must not yet have been deallocated, otherwise behavior is + * undefined. + * + * If supported, this operation may optionally be executed on a stream. + * Otherwise, the stream is ignored and the null stream is used. + * + * @throws Nothing. + * + * @param p Pointer to be deallocated + * @param bytes The size in bytes of the allocation. This must be equal to the + * value of `bytes` that was passed to the `allocate` call that returned `p`. + * @param alignment The alignment that was passed to the `allocate` call that returned `p` + */ + void deallocate(void* ptr, std::size_t bytes, std::size_t alignment) + { + do_deallocate(ptr, rmm::detail::align_up(bytes, alignment), cuda_stream_view{}); + } + + /** + * @brief Allocates memory of size at least \p bytes. + * + * The returned pointer will have at minimum 256 byte alignment. + * + * If supported, this operation may optionally be executed on a stream. + * Otherwise, the stream is ignored and the null stream is used. + * + * @throws `rmm::bad_alloc` When the requested `bytes` cannot be allocated on + * the specified `stream`. + * + * @param bytes The size of the allocation + * @param alignment The expected alignment of the allocation + * @param stream Stream on which to perform allocation + * @return void* Pointer to the newly allocated memory + */ + template , int> = 0> + void* allocate_async(std::size_t bytes, std::size_t alignment, cuda_stream_view stream) + { + return do_allocate(rmm::detail::align_up(bytes, alignment), stream); + } + + /** + * @brief Allocates memory of size at least \p bytes. + * + * The returned pointer will have at minimum 256 byte alignment. + * + * If supported, this operation may optionally be executed on a stream. + * Otherwise, the stream is ignored and the null stream is used. + * + * @throws `rmm::bad_alloc` When the requested `bytes` cannot be allocated on + * the specified `stream`. + * + * @param bytes The size of the allocation + * @param stream Stream on which to perform allocation + * @return void* Pointer to the newly allocated memory + */ + template , int> = 0> + void* allocate_async(std::size_t bytes, cuda_stream_view stream) + { + return do_allocate(rmm::detail::align_up(bytes, allocation_size_alignment), stream); + } + + /** + * @brief Deallocate memory pointed to by \p p. + * + * `p` must have been returned by a prior call to `allocate(bytes,stream)` on + * a `device_memory_resource` that compares equal to `*this`, and the storage + * it points to must not yet have been deallocated, otherwise behavior is + * undefined. + * + * If supported, this operation may optionally be executed on a stream. + * Otherwise, the stream is ignored and the null stream is used. + * + * @throws Nothing. + * + * @param p Pointer to be deallocated + * @param bytes The size in bytes of the allocation. This must be equal to the + * value of `bytes` that was passed to the `allocate` call that returned `p`. + * @param alignment The alignment that was passed to the `allocate` call that returned `p` + * @param stream Stream on which to perform allocation + */ + template , int> = 0> + void deallocate_async(void* ptr, + std::size_t bytes, + std::size_t alignment, + cuda_stream_view stream) + { + do_deallocate(ptr, rmm::detail::align_up(bytes, alignment), stream); + } + + /** + * @brief Deallocate memory pointed to by \p p. + * + * `p` must have been returned by a prior call to `allocate(bytes,stream)` on + * a `device_memory_resource` that compares equal to `*this`, and the storage + * it points to must not yet have been deallocated, otherwise behavior is + * undefined. + * + * If supported, this operation may optionally be executed on a stream. + * Otherwise, the stream is ignored and the null stream is used. + * + * @throws Nothing. + * + * @param p Pointer to be deallocated + * @param bytes The size in bytes of the allocation. This must be equal to the + * value of `bytes` that was passed to the `allocate` call that returned `p`. + * @param stream Stream on which to perform allocation + */ + template , int> = 0> + void deallocate_async(void* ptr, std::size_t bytes, cuda_stream_view stream) + { + do_deallocate(ptr, rmm::detail::align_up(bytes, allocation_size_alignment), stream); + } + + [[nodiscard]] bool operator==(device_memory_resource const& other) const noexcept + { + return do_is_equal(other); + } + + [[nodiscard]] bool operator!=(device_memory_resource const& other) const noexcept + { + return !do_is_equal(other); + } + /** * @brief Query whether the resource supports use of non-null CUDA streams for * allocation/deallocation. @@ -177,6 +335,8 @@ class device_memory_resource { return do_get_mem_info(stream); } + friend void get_property(device_memory_resource const&, cuda::mr::device_accessible) noexcept {} + private: // All allocations are padded to a multiple of allocation_size_alignment bytes. static constexpr auto allocation_size_alignment = std::size_t{8}; @@ -238,4 +398,5 @@ class device_memory_resource { [[nodiscard]] virtual std::pair do_get_mem_info( cuda_stream_view stream) const = 0; }; +static_assert(cuda::mr::resource_with, ""); } // namespace rmm::mr diff --git a/include/rmm/mr/host/host_memory_resource.hpp b/include/rmm/mr/host/host_memory_resource.hpp index 4edffc860..40da70ff8 100644 --- a/include/rmm/mr/host/host_memory_resource.hpp +++ b/include/rmm/mr/host/host_memory_resource.hpp @@ -15,6 +15,8 @@ */ #pragma once +#include + #include #include @@ -107,6 +109,18 @@ class host_memory_resource { { return do_is_equal(other); } + + [[nodiscard]] bool operator==(host_memory_resource const& other) const noexcept + { + return do_is_equal(other); + } + + [[nodiscard]] bool operator!=(host_memory_resource const& other) const noexcept + { + return !do_is_equal(other); + } + + friend void get_property(host_memory_resource const&, cuda::mr::host_accessible) noexcept {} private: /** @@ -160,4 +174,6 @@ class host_memory_resource { return this == &other; } }; +static_assert(cuda::mr::resource_with, ""); + } // namespace rmm::mr diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 8f68141fd..971327a4c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -94,6 +94,9 @@ endfunction() # device mr tests ConfigureTest(DEVICE_MR_TEST mr/device/mr_tests.cpp mr/device/mr_multithreaded_tests.cpp) +# device mr tests +ConfigureTest(DEVICE_MR_REF_TEST mr/device/mr_ref_tests.cpp) + # general adaptor tests ConfigureTest(ADAPTOR_TEST mr/device/adaptor_tests.cpp) @@ -130,6 +133,9 @@ ConfigureTest(LIMITING_TEST mr/device/limiting_mr_tests.cpp) # host mr tests ConfigureTest(HOST_MR_TEST mr/host/mr_tests.cpp) +# host mr_ref tests +ConfigureTest(HOST_MR_REF_TEST mr/host/mr_ref_tests.cpp) + # cuda stream tests ConfigureTest(CUDA_STREAM_TEST cuda_stream_tests.cpp cuda_stream_pool_tests.cpp) diff --git a/tests/mr/device/mr_ref_test.hpp b/tests/mr/device/mr_ref_test.hpp new file mode 100644 index 000000000..7071583b7 --- /dev/null +++ b/tests/mr/device/mr_ref_test.hpp @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../../byte_literals.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include + +#include +#include +#include +#include +#include + +namespace rmm::test { + +/** + * @brief Returns if a pointer points to a device memory or managed memory + * allocation. + */ +inline bool is_device_memory(void* ptr) +{ + cudaPointerAttributes attributes{}; + if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } +#if CUDART_VERSION < 10000 // memoryType is deprecated in CUDA 10 + return attributes.memoryType == cudaMemoryTypeDevice; +#else + return (attributes.type == cudaMemoryTypeDevice) or (attributes.type == cudaMemoryTypeManaged); +#endif +} + +enum size_in_bytes : size_t {}; + +constexpr auto default_num_allocations{100}; +constexpr size_in_bytes default_max_size{5_MiB}; + +struct allocation { + void* ptr{nullptr}; + std::size_t size{0}; + allocation(void* ptr, std::size_t size) : ptr{ptr}, size{size} {} + allocation() = default; +}; + +// Various test functions, shared between single-threaded and multithreaded tests. + +inline void test_get_current_device_resource() +{ + EXPECT_NE(nullptr, rmm::mr::get_current_device_resource()); + void* ptr = rmm::mr::get_current_device_resource()->allocate(1_MiB); + EXPECT_NE(nullptr, ptr); + EXPECT_TRUE(rmm::detail::is_pointer_aligned(ptr)); + EXPECT_TRUE(is_device_memory(ptr)); + rmm::mr::get_current_device_resource()->deallocate(ptr, 1_MiB); +} + +inline void test_allocate(rmm::mr::device_memory_resource* mr, + std::size_t bytes, + cuda_stream_view stream = {}) +{ + void* ptr = mr->allocate(bytes); + if (not stream.is_default()) { stream.synchronize(); } + EXPECT_NE(nullptr, ptr); + EXPECT_TRUE(rmm::detail::is_pointer_aligned(ptr)); + EXPECT_TRUE(is_device_memory(ptr)); + mr->deallocate(ptr, bytes); + if (not stream.is_default()) { stream.synchronize(); } +} + +// Simple reproducer for https://github.com/rapidsai/rmm/issues/861 +inline void concurrent_allocations_are_different(rmm::mr::device_memory_resource* mr, + cuda_stream_view stream) +{ + const auto size{8_B}; + void* ptr1 = mr->allocate(size, stream); + void* ptr2 = mr->allocate(size, stream); + + EXPECT_NE(ptr1, ptr2); + + mr->deallocate(ptr1, size, stream); + mr->deallocate(ptr2, size, stream); +} + +inline void test_various_allocations(rmm::mr::device_memory_resource* mr, cuda_stream_view stream) +{ + // test allocating zero bytes on non-default stream + { + void* ptr = mr->allocate(0, stream); + stream.synchronize(); + EXPECT_NO_THROW(mr->deallocate(ptr, 0, stream)); + stream.synchronize(); + } + + test_allocate(mr, 4_B, stream); + test_allocate(mr, 1_KiB, stream); + test_allocate(mr, 1_MiB, stream); + test_allocate(mr, 1_GiB, stream); + + // should fail to allocate too much + { + void* ptr{nullptr}; + EXPECT_THROW(ptr = mr->allocate(1_PiB, stream), rmm::out_of_memory); + EXPECT_EQ(nullptr, ptr); + + // test e.what(); + try { + ptr = mr->allocate(1_PiB, stream); + } catch (rmm::out_of_memory const& e) { + EXPECT_NE(std::string{e.what()}.find("out_of_memory"), std::string::npos); + } + } +} + +inline void test_random_allocations(rmm::mr::device_memory_resource* mr, + std::size_t num_allocations = default_num_allocations, + size_in_bytes max_size = default_max_size, + cuda_stream_view stream = {}) +{ + std::vector allocations(num_allocations); + + std::default_random_engine generator; + std::uniform_int_distribution distribution(1, max_size); + + // num_allocations allocations from [0,max_size) + std::for_each(allocations.begin(), + allocations.end(), + [&generator, &distribution, stream, mr](allocation& alloc) { + alloc.size = distribution(generator); + EXPECT_NO_THROW(alloc.ptr = mr->allocate(alloc.size, stream)); + if (not stream.is_default()) { stream.synchronize(); } + EXPECT_NE(nullptr, alloc.ptr); + EXPECT_TRUE(rmm::detail::is_pointer_aligned(alloc.ptr)); + }); + + std::for_each(allocations.begin(), allocations.end(), [stream, mr](allocation& alloc) { + EXPECT_NO_THROW(mr->deallocate(alloc.ptr, alloc.size, stream)); + if (not stream.is_default()) { stream.synchronize(); } + }); +} + +inline void test_mixed_random_allocation_free(rmm::mr::device_memory_resource* mr, + size_in_bytes max_size = default_max_size, + cuda_stream_view stream = {}) +{ + std::default_random_engine generator; + constexpr std::size_t num_allocations{100}; + + std::uniform_int_distribution size_distribution(1, max_size); + + constexpr int allocation_probability{53}; // percent + constexpr int max_probability{99}; + std::uniform_int_distribution op_distribution(0, max_probability); + std::uniform_int_distribution index_distribution(0, num_allocations - 1); + + std::size_t active_allocations{0}; + std::size_t allocation_count{0}; + + std::vector allocations; + + for (std::size_t i = 0; i < num_allocations * 2; ++i) { + bool do_alloc = true; + if (active_allocations > 0) { + int chance = op_distribution(generator); + do_alloc = (chance < allocation_probability) && (allocation_count < num_allocations); + } + + if (do_alloc) { + std::size_t size = size_distribution(generator); + active_allocations++; + allocation_count++; + EXPECT_NO_THROW(allocations.emplace_back(mr->allocate(size, stream), size)); + auto new_allocation = allocations.back(); + EXPECT_NE(nullptr, new_allocation.ptr); + EXPECT_TRUE(rmm::detail::is_pointer_aligned(new_allocation.ptr)); + } else { + auto const index = static_cast(index_distribution(generator) % active_allocations); + active_allocations--; + allocation to_free = allocations[index]; + allocations.erase(std::next(allocations.begin(), index)); + EXPECT_NO_THROW(mr->deallocate(to_free.ptr, to_free.size, stream)); + } + } + + EXPECT_EQ(active_allocations, 0); + EXPECT_EQ(allocations.size(), active_allocations); +} + +using MRFactoryFunc = std::function()>; + +/// Encapsulates a `device_memory_resource` factory function and associated name +struct mr_factory { + mr_factory(std::string name, MRFactoryFunc factory) + : name{std::move(name)}, factory{std::move(factory)} + { + } + + std::string name; ///< Name to associate with tests that use this factory + MRFactoryFunc factory; ///< Factory function that returns shared_ptr to `device_memory_resource` + ///< instance to use in test +}; + +/// Test fixture class value-parameterized on different `mr_factory`s +struct mr_test : public ::testing::TestWithParam { + void SetUp() override + { + auto factory = GetParam().factory; + mr = factory(); + if (mr == nullptr) { + GTEST_SKIP() << "Skipping tests since the memory resource is not supported with this CUDA " + << "driver/runtime version"; + } + ref = cuda::mr::resource_ref{*mr}; + } + + std::shared_ptr mr; ///< Pointer to resource to use in tests + cuda::mr::resource_ref ref{*mr}; + rmm::cuda_stream stream{}; +}; + +struct mr_allocation_test : public mr_test { +}; + +/// MR factory functions +inline auto make_cuda() { return std::make_shared(); } + +inline auto make_cuda_async() +{ + if (rmm::detail::async_alloc::is_supported()) { + return std::make_shared(); + } + return std::shared_ptr{nullptr}; +} + +inline auto make_managed() { return std::make_shared(); } + +inline auto make_pool() +{ + return rmm::mr::make_owning_wrapper(make_cuda()); +} + +inline auto make_arena() +{ + return rmm::mr::make_owning_wrapper(make_cuda()); +} + +inline auto make_fixed_size() +{ + return rmm::mr::make_owning_wrapper(make_cuda()); +} + +inline auto make_binning() +{ + auto pool = make_pool(); + // Add a binning_memory_resource with fixed-size bins of sizes 256, 512, 1024, 2048 and 4096KiB + // Larger allocations will use the pool resource + auto const bin_range_start{18}; + auto const bin_range_end{22}; + + auto mr = rmm::mr::make_owning_wrapper( + pool, bin_range_start, bin_range_end); + return mr; +} + +} // namespace rmm::test diff --git a/tests/mr/device/mr_ref_tests.cpp b/tests/mr/device/mr_ref_tests.cpp new file mode 100644 index 000000000..fa14eae28 --- /dev/null +++ b/tests/mr/device/mr_ref_tests.cpp @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mr_ref_test.hpp" + +#include + +#include + +#include + +using resource_ref = cuda::mr::resource_ref; + +namespace rmm::test { +namespace { + +INSTANTIATE_TEST_SUITE_P(ResourceTests, + mr_test, + ::testing::Values(mr_factory{"CUDA", &make_cuda}, +#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT + mr_factory{"CUDA_Async", &make_cuda_async}, +#endif + mr_factory{"Managed", &make_managed}, + mr_factory{"Pool", &make_pool}, + mr_factory{"Arena", &make_arena}, + mr_factory{"Binning", &make_binning}, + mr_factory{"Fixed_Size", &make_fixed_size}), + [](auto const& info) { return info.param.name; }); + +// Leave out fixed-size MR here because it can't handle the dynamic allocation sizes +INSTANTIATE_TEST_SUITE_P(ResourceAllocationTests, + mr_allocation_test, + ::testing::Values(mr_factory{"CUDA", &make_cuda}, +#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT + mr_factory{"CUDA_Async", &make_cuda_async}, +#endif + mr_factory{"Managed", &make_managed}, + mr_factory{"Pool", &make_pool}, + mr_factory{"Arena", &make_arena}, + mr_factory{"Binning", &make_binning}), + [](auto const& info) { return info.param.name; }); + +TEST(DefaultTest, CurrentDeviceResourceIsCUDA) +{ + EXPECT_NE(nullptr, rmm::mr::get_current_device_resource()); + EXPECT_TRUE(rmm::mr::get_current_device_resource()->is_equal(rmm::mr::cuda_memory_resource{})); +} + +TEST(DefaultTest, UseCurrentDeviceResource) { test_get_current_device_resource(); } + +TEST(DefaultTest, GetCurrentDeviceResource) +{ + resource_ref mr{*rmm::mr::get_current_device_resource()}; + rmm::mr::cuda_memory_resource default_resource{}; + resource_ref default_ref{default_resource}; + EXPECT_TRUE(mr == default_ref); +} + +TEST_P(mr_test, SetCurrentDeviceResource) +{ + rmm::mr::cuda_memory_resource default_resource{}; + resource_ref default_ref{default_resource}; + + resource_ref old_ref{*rmm::mr::set_current_device_resource(this->mr.get())}; + + // old mr should equal a cuda mr + EXPECT_TRUE(old_ref == default_ref); + + // current dev resource should equal this resource + EXPECT_TRUE(ref == this->ref); + + test_get_current_device_resource(); + + // setting to `nullptr` should reset to initial cuda resource + rmm::mr::set_current_device_resource(nullptr); + resource_ref new_ref{*rmm::mr::get_current_device_resource()}; + EXPECT_TRUE(new_ref == default_ref); +} + +TEST_P(mr_test, SelfEquality) { EXPECT_TRUE(this->mr->is_equal(*this->mr)); } + +TEST_P(mr_test, SupportsStreams) +{ + if (this->mr->is_equal(rmm::mr::cuda_memory_resource{}) || + this->mr->is_equal(rmm::mr::managed_memory_resource{})) { + EXPECT_FALSE(this->mr->supports_streams()); + } else { + EXPECT_TRUE(this->mr->supports_streams()); + } +} + +TEST_P(mr_test, GetMemInfo) +{ + if (this->mr->supports_get_mem_info()) { + const auto allocation_size{16 * 256}; + { + auto const [free, total] = this->mr->get_mem_info(rmm::cuda_stream_view{}); + EXPECT_TRUE(free >= allocation_size); + } + + void* ptr{nullptr}; + ptr = this->mr->allocate(allocation_size); + + { + auto const [free, total] = this->mr->get_mem_info(rmm::cuda_stream_view{}); + EXPECT_TRUE(free >= allocation_size); + } + + this->mr->deallocate(ptr, allocation_size); + } else { + auto const [free, total] = this->mr->get_mem_info(rmm::cuda_stream_view{}); + EXPECT_EQ(free, 0); + EXPECT_EQ(total, 0); + } +} + +// Simple reproducer for https://github.com/rapidsai/rmm/issues/861 +TEST_P(mr_test, AllocationsAreDifferentDefaultStream) +{ + concurrent_allocations_are_different(this->mr.get(), cuda_stream_view{}); +} + +TEST_P(mr_test, AllocationsAreDifferent) +{ + concurrent_allocations_are_different(this->mr.get(), this->stream); +} + +TEST_P(mr_allocation_test, AllocateDefaultStream) +{ + test_various_allocations(this->mr.get(), cuda_stream_view{}); +} + +TEST_P(mr_allocation_test, AllocateOnStream) +{ + test_various_allocations(this->mr.get(), this->stream); +} + +TEST_P(mr_allocation_test, RandomAllocations) { test_random_allocations(this->mr.get()); } + +TEST_P(mr_allocation_test, RandomAllocationsStream) +{ + test_random_allocations(this->mr.get(), default_num_allocations, default_max_size, this->stream); +} + +TEST_P(mr_allocation_test, MixedRandomAllocationFree) +{ + test_mixed_random_allocation_free(this->mr.get(), default_max_size, cuda_stream_view{}); +} + +TEST_P(mr_allocation_test, MixedRandomAllocationFreeStream) +{ + test_mixed_random_allocation_free(this->mr.get(), default_max_size, this->stream); +} + +} // namespace +} // namespace rmm::test diff --git a/tests/mr/host/mr_ref_tests.cpp b/tests/mr/host/mr_ref_tests.cpp new file mode 100644 index 000000000..eb84fd697 --- /dev/null +++ b/tests/mr/host/mr_ref_tests.cpp @@ -0,0 +1,262 @@ +/* + * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../../byte_literals.hpp" + +#include +#include +#include +#include + +#include + +#include + +#include + +#include +#include +#include + +namespace rmm::test { +namespace { +inline bool is_aligned(void* ptr, std::size_t alignment = alignof(std::max_align_t)) +{ + return rmm::detail::is_pointer_aligned(ptr, alignment); +} + +// Returns true if a pointer points to a device memory or managed memory allocation. +inline bool is_device_memory(void* ptr) +{ + cudaPointerAttributes attributes{}; + if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } +#if CUDART_VERSION < 10000 // memoryType is deprecated in CUDA 10 + return attributes.memoryType == cudaMemoryTypeDevice; +#else + return (attributes.type == cudaMemoryTypeDevice) or (attributes.type == cudaMemoryTypeManaged); +#endif +} + +/** + * @brief Returns if a pointer `p` points to pinned host memory. + */ +inline bool is_pinned_memory(void* ptr) +{ + cudaPointerAttributes attributes{}; + if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; } + return attributes.type == cudaMemoryTypeHost; +} + +constexpr std::size_t size_word{4_B}; +constexpr std::size_t size_kb{1_KiB}; +constexpr std::size_t size_mb{1_MiB}; +constexpr std::size_t size_gb{1_GiB}; +constexpr std::size_t size_pb{1_PiB}; + +struct allocation { + void* ptr{nullptr}; + std::size_t size{0}; + allocation(void* ptr, std::size_t size) : ptr{ptr}, size{size} {} + allocation() = default; +}; +} // namespace + +template +struct MRTest : public ::testing::Test { + MemoryResourceType mr; + cuda::mr::resource_ref ref; + + MRTest() : mr{}, ref{mr} {} +}; + +using resources = ::testing::Types; +static_assert(cuda::mr::resource_with, ""); +static_assert(cuda::mr::resource_with, ""); + +TYPED_TEST_CASE(MRTest, resources); + +TYPED_TEST(MRTest, SelfEquality) { EXPECT_TRUE(this->ref == this->ref); } + +TYPED_TEST(MRTest, AllocateZeroBytes) +{ + void* ptr{nullptr}; + EXPECT_NO_THROW(ptr = this->ref.allocate(0)); + EXPECT_NO_THROW(this->ref.deallocate(ptr, 0)); +} + +TYPED_TEST(MRTest, AllocateWord) +{ + void* ptr{nullptr}; + EXPECT_NO_THROW(ptr = this->ref.allocate(size_word)); + EXPECT_NE(nullptr, ptr); + EXPECT_TRUE(is_aligned(ptr)); + EXPECT_FALSE(is_device_memory(ptr)); + EXPECT_NO_THROW(this->ref.deallocate(ptr, size_word)); +} + +TYPED_TEST(MRTest, AllocateKB) +{ + void* ptr{nullptr}; + EXPECT_NO_THROW(ptr = this->ref.allocate(size_kb)); + EXPECT_NE(nullptr, ptr); + EXPECT_TRUE(is_aligned(ptr)); + EXPECT_FALSE(is_device_memory(ptr)); + EXPECT_NO_THROW(this->ref.deallocate(ptr, size_kb)); +} + +TYPED_TEST(MRTest, AllocateMB) +{ + void* ptr{nullptr}; + EXPECT_NO_THROW(ptr = this->ref.allocate(size_mb)); + EXPECT_NE(nullptr, ptr); + EXPECT_TRUE(is_aligned(ptr)); + EXPECT_FALSE(is_device_memory(ptr)); + EXPECT_NO_THROW(this->ref.deallocate(ptr, size_mb)); +} + +TYPED_TEST(MRTest, AllocateGB) +{ + void* ptr{nullptr}; + EXPECT_NO_THROW(ptr = this->ref.allocate(size_gb)); + EXPECT_NE(nullptr, ptr); + EXPECT_TRUE(is_aligned(ptr)); + EXPECT_FALSE(is_device_memory(ptr)); + EXPECT_NO_THROW(this->ref.deallocate(ptr, size_gb)); +} + +TYPED_TEST(MRTest, AllocateTooMuch) +{ + void* ptr{nullptr}; + EXPECT_THROW(ptr = this->ref.allocate(size_pb), std::bad_alloc); + EXPECT_EQ(nullptr, ptr); +} + +TYPED_TEST(MRTest, RandomAllocations) +{ + constexpr std::size_t num_allocations{100}; + std::vector allocations(num_allocations); + + constexpr std::size_t MAX_ALLOCATION_SIZE{5 * size_mb}; + + std::default_random_engine generator; + std::uniform_int_distribution distribution(1, MAX_ALLOCATION_SIZE); + + // 100 allocations from [0,5MB) + std::for_each( + allocations.begin(), allocations.end(), [&generator, &distribution, this](allocation& alloc) { + alloc.size = distribution(generator); + EXPECT_NO_THROW(alloc.ptr = this->ref.allocate(alloc.size)); + EXPECT_NE(nullptr, alloc.ptr); + EXPECT_TRUE(is_aligned(alloc.ptr)); + }); + + std::for_each(allocations.begin(), allocations.end(), [this](allocation& alloc) { + EXPECT_NO_THROW(this->ref.deallocate(alloc.ptr, alloc.size)); + }); +} + +TYPED_TEST(MRTest, MixedRandomAllocationFree) +{ + std::default_random_engine generator; + + constexpr std::size_t MAX_ALLOCATION_SIZE{10 * size_mb}; + std::uniform_int_distribution size_distribution(1, MAX_ALLOCATION_SIZE); + + // How often a free will occur. For example, if `1`, then every allocation + // will immediately be free'd. Or, if 4, on average, a free will occur after + // every 4th allocation + constexpr std::size_t FREE_FREQUENCY{4}; + std::uniform_int_distribution free_distribution(1, FREE_FREQUENCY); + + std::deque allocations; + + constexpr std::size_t num_allocations{100}; + for (std::size_t i = 0; i < num_allocations; ++i) { + std::size_t allocation_size = size_distribution(generator); + EXPECT_NO_THROW(allocations.emplace_back(this->ref.allocate(allocation_size), allocation_size)); + auto new_allocation = allocations.back(); + EXPECT_NE(nullptr, new_allocation.ptr); + EXPECT_TRUE(is_aligned(new_allocation.ptr)); + + bool const free_front{free_distribution(generator) == free_distribution.max()}; + + if (free_front) { + auto front = allocations.front(); + EXPECT_NO_THROW(this->ref.deallocate(front.ptr, front.size)); + allocations.pop_front(); + } + } + // free any remaining allocations + for (auto alloc : allocations) { + EXPECT_NO_THROW(this->ref.deallocate(alloc.ptr, alloc.size)); + allocations.pop_front(); + } +} + +static constexpr std::size_t MinTestedAlignment{16}; +static constexpr std::size_t MaxTestedAlignment{4096}; +static constexpr std::size_t TestedAlignmentMultiplier{2}; +static constexpr std::size_t NUM_TRIALS{100}; + +TYPED_TEST(MRTest, AlignmentTest) +{ + std::default_random_engine generator(0); + constexpr std::size_t MAX_ALLOCATION_SIZE{10 * size_mb}; + std::uniform_int_distribution size_distribution(1, MAX_ALLOCATION_SIZE); + + for (std::size_t num_trials = 0; num_trials < NUM_TRIALS; ++num_trials) { + for (std::size_t alignment = MinTestedAlignment; alignment <= MaxTestedAlignment; + alignment *= TestedAlignmentMultiplier) { + auto allocation_size = size_distribution(generator); + void* ptr{nullptr}; + EXPECT_NO_THROW(ptr = this->ref.allocate(allocation_size, alignment)); + EXPECT_TRUE(is_aligned(ptr, alignment)); + EXPECT_NO_THROW(this->ref.deallocate(ptr, allocation_size, alignment)); + } + } +} + +TYPED_TEST(MRTest, UnsupportedAlignmentTest) +{ + std::default_random_engine generator(0); + constexpr std::size_t MAX_ALLOCATION_SIZE{10 * size_mb}; + std::uniform_int_distribution size_distribution(1, MAX_ALLOCATION_SIZE); + + for (std::size_t num_trials = 0; num_trials < NUM_TRIALS; ++num_trials) { + for (std::size_t alignment = MinTestedAlignment; alignment <= MaxTestedAlignment; + alignment *= TestedAlignmentMultiplier) { + auto allocation_size = size_distribution(generator); + void* ptr{nullptr}; + // An unsupported alignment (like an odd number) should result in an + // alignment of `alignof(std::max_align_t)` + auto const bad_alignment = alignment + 1; + EXPECT_NO_THROW(ptr = this->ref.allocate(allocation_size, bad_alignment)); + EXPECT_TRUE(is_aligned(ptr, alignof(std::max_align_t))); + EXPECT_NO_THROW(this->ref.deallocate(ptr, allocation_size, bad_alignment)); + } + } +} + +TEST(PinnedResource, isPinned) +{ + rmm::mr::pinned_memory_resource mr; + cuda::mr::resource_ref ref{mr}; + void* ptr{nullptr}; + EXPECT_NO_THROW(ptr = ref.allocate(100)); + EXPECT_TRUE(is_pinned_memory(ptr)); + EXPECT_NO_THROW(ref.deallocate(ptr, 100)); +} +} // namespace rmm::test diff --git a/tests/mr/host/mr_tests.cpp b/tests/mr/host/mr_tests.cpp index 1cd59f5a6..cc485f95e 100644 --- a/tests/mr/host/mr_tests.cpp +++ b/tests/mr/host/mr_tests.cpp @@ -23,6 +23,8 @@ #include +#include + #include #include @@ -80,6 +82,8 @@ struct MRTest : public ::testing::Test { }; using resources = ::testing::Types; +static_assert(cuda::mr::resource_with, ""); +static_assert(cuda::mr::resource_with, ""); TYPED_TEST_CASE(MRTest, resources);