rapidsai · rapids-bot · Jan 15, 2024 · Dec 19, 2023 · Dec 19, 2023 · Dec 19, 2023
diff --git a/.clang-tidy b/.clang-tidy
@@ -62,8 +62,8 @@ CheckOptions:
     value:           'alignment'
   - key:             cppcoreguidelines-avoid-magic-numbers.IgnorePowersOf2IntegerValues
     value:           '1'
-  - key:             readability-magic-numbers.IgnorePowersOf2IntegerValues
-    value:           '1'
+  - key:             cppcoreguidelines-avoid-magic-numbers.IgnoredIntegerValues
+    value:           "0;1;2;3;4;50;100"
   - key:             cppcoreguidelines-avoid-do-while.IgnoreMacros
     value:           'true'
 ...
diff --git a/README.md b/README.md
@@ -332,7 +332,9 @@ Accessing and modifying the default resource is done through two functions:
 ```c++
 rmm::mr::cuda_memory_resource cuda_mr;
 // Construct a resource that uses a coalescing best-fit pool allocator
-rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> pool_mr{&cuda_mr};
+// With the pool initially half of available device memory
+auto initial_size = rmm::percent_of_free_device_memory(50);
+rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> pool_mr{&cuda_mr, initial_size};
 rmm::mr::set_current_device_resource(&pool_mr); // Updates the current device resource pointer to `pool_mr`
 rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); // Points to `pool_mr`
 ```
@@ -351,11 +353,13 @@ per-device resources. Here is an example loop that creates `unique_ptr`s to `poo
 objects for each device and sets them as the per-device resource for that device.
 
 ```c++
-std::vector<unique_ptr<pool_memory_resource>> per_device_pools;
+using pool_mr = rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource>;
+std::vector<unique_ptr<pool_mr>> per_device_pools;
 for(int i = 0; i < N; ++i) {
   cudaSetDevice(i); // set device i before creating MR
   // Use a vector of unique_ptr to maintain the lifetime of the MRs
-  per_device_pools.push_back(std::make_unique<pool_memory_resource>());
+  // Note: for brevity, omitting creation of upstream and computing initial_size
+  per_device_pools.push_back(std::make_unique<pool_mr>(upstream, initial_size));
   // Set the per-device resource for device i
   set_per_device_resource(cuda_device_id{i}, &per_device_pools.back());
 }

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
 
 #include "../synchronization/synchronization.hpp"
 
+#include <rmm/cuda_device.hpp>
 #include <rmm/cuda_stream.hpp>
 #include <rmm/detail/error.hpp>
 #include <rmm/device_uvector.hpp>
@@ -38,7 +39,8 @@
 void BM_UvectorSizeConstruction(benchmark::State& state)
 {
   rmm::mr::cuda_memory_resource cuda_mr{};
-  rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> mr{&cuda_mr};
+  rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> mr{
+    &cuda_mr, rmm::percent_of_free_device_memory(50)};
   rmm::mr::set_current_device_resource(&mr);
 
   for (auto _ : state) {  // NOLINT(clang-analyzer-deadcode.DeadStores)
@@ -59,7 +61,8 @@ BENCHMARK(BM_UvectorSizeConstruction)
 void BM_ThrustVectorSizeConstruction(benchmark::State& state)
 {
   rmm::mr::cuda_memory_resource cuda_mr{};
-  rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> mr{&cuda_mr};
+  rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> mr{
+    &cuda_mr, rmm::percent_of_free_device_memory(50)};
   rmm::mr::set_current_device_resource(&mr);
 
   for (auto _ : state) {  // NOLINT(clang-analyzer-deadcode.DeadStores)

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
 
 #include <benchmarks/utilities/cxxopts.hpp>
 
+#include <rmm/cuda_device.hpp>
 #include <rmm/cuda_stream.hpp>
 #include <rmm/cuda_stream_pool.hpp>
 #include <rmm/device_uvector.hpp>
@@ -100,7 +101,8 @@ inline auto make_cuda_async() { return std::make_shared<rmm::mr::cuda_async_memo
 
 inline auto make_pool()
 {
-  return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda());
+  return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(
+    make_cuda(), rmm::percent_of_free_device_memory(50));
 }
 
 inline auto make_arena()

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
 
 #include <benchmarks/utilities/cxxopts.hpp>
 
+#include <rmm/cuda_device.hpp>
 #include <rmm/mr/device/arena_memory_resource.hpp>
 #include <rmm/mr/device/binning_memory_resource.hpp>
 #include <rmm/mr/device/cuda_async_memory_resource.hpp>
@@ -165,12 +166,13 @@ inline auto make_cuda_async() { return std::make_shared<rmm::mr::cuda_async_memo
 
 inline auto make_pool()
 {
-  return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda());
+  return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(
+    make_cuda(), rmm::percent_of_free_device_memory(50));
 }
 
 inline auto make_arena()
 {
-  auto free = rmm::detail::available_device_memory().first;
+  auto free = rmm::available_device_memory().first;
   constexpr auto reserve{64UL << 20};  // Leave some space for CUDA overhead.
   return rmm::mr::make_owning_wrapper<rmm::mr::arena_memory_resource>(make_cuda(), free - reserve);
 }

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -61,7 +61,7 @@ inline auto make_pool(std::size_t simulated_size)
     return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(
       make_simulated(simulated_size), simulated_size, simulated_size);
   }
-  return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda());
+  return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda(), 0);
 }
 
 inline auto make_arena(std::size_t simulated_size)

diff --git a/doxygen/Doxyfile b/doxygen/Doxyfile
@@ -504,7 +504,7 @@ EXTRACT_PACKAGE        = NO
 # included in the documentation.
 # The default value is: NO.
 
-EXTRACT_STATIC         = NO
+EXTRACT_STATIC         = YES
 
 # If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined
 # locally in source files will be included in the documentation. If set to NO,

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -41,4 +41,5 @@
  * @defgroup errors Errors
  * @defgroup logging Logging
  * @defgroup thrust_integrations Thrust Integrations
+ * @defgroup utilities Utilities
  */
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+
+namespace rmm {
+
+/**
+ * @addtogroup utilities
+ * @{
+ * @file
+ */
+
+/**
+ * @brief Default alignment used for host memory allocated by RMM.
+ *
+ */
+static constexpr std::size_t RMM_DEFAULT_HOST_ALIGNMENT{alignof(std::max_align_t)};
+
+/**
+ * @brief Default alignment used for CUDA memory allocation.
+ *
+ */
+static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256};
+
+/**
+ * @brief Returns whether or not `value` is a power of 2.
+ *
+ * @param[in] value to check.
+ *
+ * @return Whether the input a power of two with non-negative exponent
+ */
+constexpr bool is_pow2(std::size_t value) { return (value != 0U) && ((value & (value - 1)) == 0U); }
+
+/**
+ * @brief Returns whether or not `alignment` is a valid memory alignment.
+ *
+ * @param[in] alignment to check
+ *
+ * @return Whether the alignment is valid
+ */
+constexpr bool is_supported_alignment(std::size_t alignment) { return is_pow2(alignment); }
+
+/**
+ * @brief Align up to nearest multiple of specified power of 2
+ *
+ * @param[in] value value to align
+ * @param[in] alignment amount, in bytes, must be a power of 2
+ *
+ * @return Return the aligned value, as one would expect
+ */
+constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcept
+{
+  assert(is_supported_alignment(alignment));
+  return (value + (alignment - 1)) & ~(alignment - 1);
+}
+
+/**
+ * @brief Align down to the nearest multiple of specified power of 2
+ *
+ * @param[in] value value to align
+ * @param[in] alignment amount, in bytes, must be a power of 2
+ *
+ * @return Return the aligned value, as one would expect
+ */
+constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexcept
+{
+  assert(is_supported_alignment(alignment));
+  return value & ~(alignment - 1);
+}
+
+/**
+ * @brief Checks whether a value is aligned to a multiple of a specified power of 2
+ *
+ * @param[in] value value to check for alignment
+ * @param[in] alignment amount, in bytes, must be a power of 2
+ *
+ * @return true if aligned
+ */
+constexpr bool is_aligned(std::size_t value, std::size_t alignment) noexcept
+{
+  assert(is_supported_alignment(alignment));
+  return value == align_down(value, alignment);
+}
+
+/**
+ * @brief Checks whether the provided pointer is aligned to a specified @p alignment
+ *
+ * @param[in] ptr pointer to check for alignment
+ * @param[in] alignment required alignment in bytes, must be a power of 2
+ *
+ * @return true if the pointer is aligned
+ */
+inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT)
+{
+  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
+  return is_aligned(reinterpret_cast<std::uintptr_t>(ptr), alignment);
+}
+
+/** @} */  // end of group
+
+}  // namespace rmm
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,6 +15,7 @@
  */
 #pragma once
 
+#include <rmm/aligned.hpp>
 #include <rmm/detail/error.hpp>
 
 #include <cuda_runtime_api.h>
@@ -102,6 +103,49 @@ inline int get_num_cuda_devices()
   return num_dev;
 }
 
+/**
+ * @brief Returns the available and total device memory in bytes for the current device
+ *
+ * @return The available and total device memory in bytes for the current device as a std::pair.
+ */
+inline std::pair<std::size_t, std::size_t> available_device_memory()
+{
+  std::size_t free{};
+  std::size_t total{};
+  RMM_CUDA_TRY(cudaMemGetInfo(&free, &total));
+  return {free, total};
+}
+
+namespace detail {
+
+/**
+ * @brief Returns the available and total device memory in bytes for the current device
+ *
+ * @deprecated Use rmm::available_device_memory() instead.
+ *
+ * @return The available and total device memory in bytes for the current device as a std::pair.
+ */
+//[[deprecated("Use `rmm::available_device_memory` instead.")]]  //
+const auto available_device_memory = rmm::available_device_memory;
+
+}  // namespace detail
+
+/**
+ * @brief Returns the approximate specified percent of available device memory on the current CUDA
+ * device, aligned (down) to the nearest CUDA allocation size.
+ *
+ * @param percent The percent of free memory to return.
+ *
+ * @return The recommended initial device memory pool size in bytes.
+ */
+inline std::size_t percent_of_free_device_memory(int percent)
+{
+  [[maybe_unused]] auto const [free, total] = rmm::available_device_memory();
+  auto fraction                             = static_cast<double>(percent) / 100.0;
+  return rmm::align_down(static_cast<std::size_t>(static_cast<double>(free) * fraction),
+                         rmm::CUDA_ALLOCATION_ALIGNMENT);
+}
+
 /**
  * @brief RAII class that sets the current CUDA device to the specified device on construction
  * and restores the previous device on destruction.