[DO NOT MERGE] PoC for the new design of `cuda::mr::{async_}resource_…

…ref` This adds support of the current `host_memory_resource` and `device_memory_resource` for the new design of `cuda::mr::{async_}resource_ref` It is not perfect yet and meant as a first step to gain experience with the design and start early experimentation
rapidsai · Sep 30, 2022 · 777d0f0 · 777d0f0
1 parent d212232
commit 777d0f0
Show file tree

Hide file tree

Showing 10 changed files with 966 additions and 7 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -58,13 +58,16 @@ rapids_find_package(
 rapids_cpm_init()
 include(cmake/thirdparty/get_spdlog.cmake)
 include(cmake/thirdparty/get_thrust.cmake)
+include(cmake/thirdparty/get_libcudacxx.cmake)
 
 # library targets
 add_library(rmm INTERFACE)
 add_library(rmm::rmm ALIAS rmm)
 
-target_include_directories(rmm INTERFACE "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
-                                         "$<INSTALL_INTERFACE:include>")
+target_include_directories(
+  rmm
+  INTERFACE "$<BUILD_INTERFACE:${LIBCUDACXX_INCLUDE_DIR}>"
+            "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>" "$<INSTALL_INTERFACE:include>")
 
 if(CUDA_STATIC_RUNTIME)
   message(STATUS "RMM: Enabling static linking of cudart")
@@ -109,6 +112,10 @@ include(CPack)
 # install export targets
 install(TARGETS rmm EXPORT rmm-exports)
 install(DIRECTORY include/rmm/ DESTINATION include/rmm)
+install(
+  DIRECTORY ${RMM_GENERATED_INCLUDE_DIR}/include/libcxx
+            ${RMM_GENERATED_INCLUDE_DIR}/include/libcudacxx
+  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rmm)
 install(FILES ${RMM_BINARY_DIR}/include/rmm/version_config.hpp DESTINATION include/rmm)
 
 set(doc_string

diff --git a/cmake/thirdparty/get_libcudacxx.cmake b/cmake/thirdparty/get_libcudacxx.cmake
@@ -0,0 +1,33 @@
+# =============================================================================
+# Copyright (c) 2020, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+# Use CPM to find or clone libcudacxx
+function(find_and_configure_libcudacxx VERSION)
+  rapids_cpm_find(
+    libcudacxx ${VERSION}
+    GIT_REPOSITORY https://github.com/miscco/libcudacxx.git
+    GIT_TAG memory_resource # ${VERSION}
+    GIT_SHALLOW TRUE DOWNLOAD_ONLY TRUE)
+
+  set(LIBCUDACXX_INCLUDE_DIR
+      "${libcudacxx_SOURCE_DIR}/include"
+      PARENT_SCOPE)
+  set(LIBCXX_INCLUDE_DIR
+      "${libcudacxx_SOURCE_DIR}/libcxx/include"
+      PARENT_SCOPE)
+endfunction()
+
+set(RMM_MIN_VERSION_libcudacxx 1.5.0)
+
+find_and_configure_libcudacxx(${RMM_MIN_VERSION_libcudacxx})
diff --git a/include/rmm/cuda_stream_view.hpp b/include/rmm/cuda_stream_view.hpp
@@ -23,6 +23,7 @@
 #include <atomic>
 #include <cstddef>
 #include <cstdint>
+#include <cuda/stream_ref>
 
 namespace rmm {
 
@@ -60,6 +61,11 @@ class cuda_stream_view {
    * @brief Implicit conversion to cudaStream_t.
    */
   constexpr operator cudaStream_t() const noexcept { return value(); }
+
+  /**
+   * @brief Implicit conversion to stream_ref.
+   */
+  constexpr operator cuda::stream_ref() const noexcept { return value(); }
 
   /**
    * @brief Return true if the wrapped stream is the CUDA per-thread default stream.

diff --git a/include/rmm/mr/device/device_memory_resource.hpp b/include/rmm/mr/device/device_memory_resource.hpp
@@ -18,10 +18,16 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/detail/aligned.hpp>
 
+#include <cuda/memory_resource>
+
 #include <cstddef>
 #include <utility>
 
 namespace rmm::mr {
+/**
+ * @brief Tag type that enables the allocate_async interface
+ */
+struct supports_allocate_async {};
 
 /**
  * @brief Base class for all libcudf device memory allocation.
@@ -81,11 +87,11 @@ namespace rmm::mr {
  */
 class device_memory_resource {
  public:
-  device_memory_resource()                              = default;
-  virtual ~device_memory_resource()                     = default;
-  device_memory_resource(device_memory_resource const&) = default;
-  device_memory_resource& operator=(device_memory_resource const&) = default;
-  device_memory_resource(device_memory_resource&&) noexcept        = default;
+  device_memory_resource()                                             = default;
+  virtual ~device_memory_resource()                                    = default;
+  device_memory_resource(device_memory_resource const&)                = default;
+  device_memory_resource& operator=(device_memory_resource const&)     = default;
+  device_memory_resource(device_memory_resource&&) noexcept            = default;
   device_memory_resource& operator=(device_memory_resource&&) noexcept = default;
 
   /**
@@ -149,6 +155,158 @@ class device_memory_resource {
     return do_is_equal(other);
   }
 
+  /**
+   * @brief Allocates memory of size at least \p bytes.
+   *
+   * The returned pointer will have at minimum 256 byte alignment.
+   *
+   * If supported, this operation may optionally be executed on a stream.
+   * Otherwise, the stream is ignored and the null stream is used.
+   *
+   * @throws `rmm::bad_alloc` When the requested `bytes` cannot be allocated on
+   * the specified `stream`.
+   *
+   * @param bytes The size of the allocation
+   * @param alignment The expected alignment of the allocation
+   * @return void* Pointer to the newly allocated memory
+   */
+  void* allocate(std::size_t bytes, std::size_t alignment)
+  {
+    return do_allocate(rmm::detail::align_up(bytes, alignment), cuda_stream_view{});
+  }
+
+  /**
+   * @brief Deallocate memory pointed to by \p p.
+   *
+   * `p` must have been returned by a prior call to `allocate(bytes,stream)` on
+   * a `device_memory_resource` that compares equal to `*this`, and the storage
+   * it points to must not yet have been deallocated, otherwise behavior is
+   * undefined.
+   *
+   * If supported, this operation may optionally be executed on a stream.
+   * Otherwise, the stream is ignored and the null stream is used.
+   *
+   * @throws Nothing.
+   *
+   * @param p Pointer to be deallocated
+   * @param bytes The size in bytes of the allocation. This must be equal to the
+   * value of `bytes` that was passed to the `allocate` call that returned `p`.
+   * @param alignment The alignment that was passed to the `allocate` call that returned `p`
+   */
+  void deallocate(void* ptr, std::size_t bytes, std::size_t alignment)
+  {
+    do_deallocate(ptr, rmm::detail::align_up(bytes, alignment), cuda_stream_view{});
+  }
+
+  /**
+   * @brief Allocates memory of size at least \p bytes.
+   *
+   * The returned pointer will have at minimum 256 byte alignment.
+   *
+   * If supported, this operation may optionally be executed on a stream.
+   * Otherwise, the stream is ignored and the null stream is used.
+   *
+   * @throws `rmm::bad_alloc` When the requested `bytes` cannot be allocated on
+   * the specified `stream`.
+   *
+   * @param bytes The size of the allocation
+   * @param alignment The expected alignment of the allocation
+   * @param stream Stream on which to perform allocation
+   * @return void* Pointer to the newly allocated memory
+   */
+  template <class Res = device_memory_resource,
+            std::enable_if_t<cuda::mr::has_property<Res, supports_allocate_async>, int> = 0>
+  void* allocate_async(std::size_t bytes, std::size_t alignment, cuda_stream_view stream)
+  {
+    return do_allocate(rmm::detail::align_up(bytes, alignment), stream);
+  }
+
+  /**
+   * @brief Allocates memory of size at least \p bytes.
+   *
+   * The returned pointer will have at minimum 256 byte alignment.
+   *
+   * If supported, this operation may optionally be executed on a stream.
+   * Otherwise, the stream is ignored and the null stream is used.
+   *
+   * @throws `rmm::bad_alloc` When the requested `bytes` cannot be allocated on
+   * the specified `stream`.
+   *
+   * @param bytes The size of the allocation
+   * @param stream Stream on which to perform allocation
+   * @return void* Pointer to the newly allocated memory
+   */
+  template <class Res = device_memory_resource,
+            std::enable_if_t<cuda::mr::has_property<Res, supports_allocate_async>, int> = 0>
+  void* allocate_async(std::size_t bytes, cuda_stream_view stream)
+  {
+    return do_allocate(rmm::detail::align_up(bytes, allocation_size_alignment), stream);
+  }
+
+  /**
+   * @brief Deallocate memory pointed to by \p p.
+   *
+   * `p` must have been returned by a prior call to `allocate(bytes,stream)` on
+   * a `device_memory_resource` that compares equal to `*this`, and the storage
+   * it points to must not yet have been deallocated, otherwise behavior is
+   * undefined.
+   *
+   * If supported, this operation may optionally be executed on a stream.
+   * Otherwise, the stream is ignored and the null stream is used.
+   *
+   * @throws Nothing.
+   *
+   * @param p Pointer to be deallocated
+   * @param bytes The size in bytes of the allocation. This must be equal to the
+   * value of `bytes` that was passed to the `allocate` call that returned `p`.
+   * @param alignment The alignment that was passed to the `allocate` call that returned `p`
+   * @param stream Stream on which to perform allocation
+   */
+  template <class Res = device_memory_resource,
+            std::enable_if_t<cuda::mr::has_property<Res, supports_allocate_async>, int> = 0>
+  void deallocate_async(void* ptr,
+                        std::size_t bytes,
+                        std::size_t alignment,
+                        cuda_stream_view stream)
+  {
+    do_deallocate(ptr, rmm::detail::align_up(bytes, alignment), stream);
+  }
+
+  /**
+   * @brief Deallocate memory pointed to by \p p.
+   *
+   * `p` must have been returned by a prior call to `allocate(bytes,stream)` on
+   * a `device_memory_resource` that compares equal to `*this`, and the storage
+   * it points to must not yet have been deallocated, otherwise behavior is
+   * undefined.
+   *
+   * If supported, this operation may optionally be executed on a stream.
+   * Otherwise, the stream is ignored and the null stream is used.
+   *
+   * @throws Nothing.
+   *
+   * @param p Pointer to be deallocated
+   * @param bytes The size in bytes of the allocation. This must be equal to the
+   * value of `bytes` that was passed to the `allocate` call that returned `p`.
+   * @param stream Stream on which to perform allocation
+   */
+  template <class Res = device_memory_resource,
+            std::enable_if_t<cuda::mr::has_property<Res, supports_allocate_async>, int> = 0>
+  void deallocate_async(void* ptr, std::size_t bytes, cuda_stream_view stream)
+  {
+    do_deallocate(ptr, rmm::detail::align_up(bytes, allocation_size_alignment), stream);
+  }
+
+  [[nodiscard]] bool operator==(device_memory_resource const& other) const noexcept
+  {
+    return do_is_equal(other);
+  }
+
+  [[nodiscard]] bool operator!=(device_memory_resource const& other) const noexcept
+  {
+    return !do_is_equal(other);
+  }
+
   /**
    * @brief Query whether the resource supports use of non-null CUDA streams for
    * allocation/deallocation.
@@ -177,6 +335,8 @@ class device_memory_resource {
     return do_get_mem_info(stream);
   }
 
+  friend void get_property(device_memory_resource const&, cuda::mr::device_accessible) noexcept {}
+
  private:
   // All allocations are padded to a multiple of allocation_size_alignment bytes.
   static constexpr auto allocation_size_alignment = std::size_t{8};
@@ -238,4 +398,5 @@ class device_memory_resource {
   [[nodiscard]] virtual std::pair<std::size_t, std::size_t> do_get_mem_info(
     cuda_stream_view stream) const = 0;
 };
+static_assert(cuda::mr::resource_with<device_memory_resource, cuda::mr::device_accessible>, "");
 }  // namespace rmm::mr
diff --git a/include/rmm/mr/host/host_memory_resource.hpp b/include/rmm/mr/host/host_memory_resource.hpp
@@ -15,6 +15,8 @@
  */
 #pragma once
 
+#include <cuda/memory_resource>
+
 #include <cstddef>
 #include <utility>
 
@@ -107,6 +109,18 @@ class host_memory_resource {
   {
     return do_is_equal(other);
   }
+
+  [[nodiscard]] bool operator==(host_memory_resource const& other) const noexcept
+  {
+    return do_is_equal(other);
+  }
+
+  [[nodiscard]] bool operator!=(host_memory_resource const& other) const noexcept
+  {
+    return !do_is_equal(other);
+  }
+
+  friend void get_property(host_memory_resource const&, cuda::mr::host_accessible) noexcept {}
 
  private:
   /**
@@ -160,4 +174,6 @@ class host_memory_resource {
     return this == &other;
   }
 };
+static_assert(cuda::mr::resource_with<host_memory_resource, cuda::mr::host_accessible>, "");
+
 }  // namespace rmm::mr
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -94,6 +94,9 @@ endfunction()
 # device mr tests
 ConfigureTest(DEVICE_MR_TEST mr/device/mr_tests.cpp mr/device/mr_multithreaded_tests.cpp)
 
+# device mr tests
+ConfigureTest(DEVICE_MR_REF_TEST mr/device/mr_ref_tests.cpp)
+
 # general adaptor tests
 ConfigureTest(ADAPTOR_TEST mr/device/adaptor_tests.cpp)
 
@@ -130,6 +133,9 @@ ConfigureTest(LIMITING_TEST mr/device/limiting_mr_tests.cpp)
 # host mr tests
 ConfigureTest(HOST_MR_TEST mr/host/mr_tests.cpp)
 
+# host mr_ref tests
+ConfigureTest(HOST_MR_REF_TEST mr/host/mr_ref_tests.cpp)
+
 # cuda stream tests
 ConfigureTest(CUDA_STREAM_TEST cuda_stream_tests.cpp cuda_stream_pool_tests.cpp)