diff --git a/.clang-tidy b/.clang-tidy index 9b3f844c9..70a0bea16 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -62,8 +62,8 @@ CheckOptions: value: 'alignment' - key: cppcoreguidelines-avoid-magic-numbers.IgnorePowersOf2IntegerValues value: '1' - - key: readability-magic-numbers.IgnorePowersOf2IntegerValues - value: '1' + - key: cppcoreguidelines-avoid-magic-numbers.IgnoredIntegerValues + value: "0;1;2;3;4;50;100" - key: cppcoreguidelines-avoid-do-while.IgnoreMacros value: 'true' ... diff --git a/README.md b/README.md index e033ef56f..a1b85d33c 100644 --- a/README.md +++ b/README.md @@ -332,7 +332,9 @@ Accessing and modifying the default resource is done through two functions: ```c++ rmm::mr::cuda_memory_resource cuda_mr; // Construct a resource that uses a coalescing best-fit pool allocator -rmm::mr::pool_memory_resource pool_mr{&cuda_mr}; +// With the pool initially half of available device memory +auto initial_size = rmm::percent_of_free_device_memory(50); +rmm::mr::pool_memory_resource pool_mr{&cuda_mr, initial_size}; rmm::mr::set_current_device_resource(&pool_mr); // Updates the current device resource pointer to `pool_mr` rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); // Points to `pool_mr` ``` @@ -351,11 +353,13 @@ per-device resources. Here is an example loop that creates `unique_ptr`s to `poo objects for each device and sets them as the per-device resource for that device. ```c++ -std::vector> per_device_pools; +using pool_mr = rmm::mr::pool_memory_resource; +std::vector> per_device_pools; for(int i = 0; i < N; ++i) { cudaSetDevice(i); // set device i before creating MR // Use a vector of unique_ptr to maintain the lifetime of the MRs - per_device_pools.push_back(std::make_unique()); + // Note: for brevity, omitting creation of upstream and computing initial_size + per_device_pools.push_back(std::make_unique(upstream, initial_size)); // Set the per-device resource for device i set_per_device_resource(cuda_device_id{i}, &per_device_pools.back()); } diff --git a/benchmarks/device_uvector/device_uvector_bench.cu b/benchmarks/device_uvector/device_uvector_bench.cu index 454db81a5..8b7f9a5ba 100644 --- a/benchmarks/device_uvector/device_uvector_bench.cu +++ b/benchmarks/device_uvector/device_uvector_bench.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #include "../synchronization/synchronization.hpp" +#include #include #include #include @@ -38,7 +39,8 @@ void BM_UvectorSizeConstruction(benchmark::State& state) { rmm::mr::cuda_memory_resource cuda_mr{}; - rmm::mr::pool_memory_resource mr{&cuda_mr}; + rmm::mr::pool_memory_resource mr{ + &cuda_mr, rmm::percent_of_free_device_memory(50)}; rmm::mr::set_current_device_resource(&mr); for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) @@ -59,7 +61,8 @@ BENCHMARK(BM_UvectorSizeConstruction) void BM_ThrustVectorSizeConstruction(benchmark::State& state) { rmm::mr::cuda_memory_resource cuda_mr{}; - rmm::mr::pool_memory_resource mr{&cuda_mr}; + rmm::mr::pool_memory_resource mr{ + &cuda_mr, rmm::percent_of_free_device_memory(50)}; rmm::mr::set_current_device_resource(&mr); for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) diff --git a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu index 5ed1b31f9..4943e507f 100644 --- a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu +++ b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #include +#include #include #include #include @@ -100,7 +101,8 @@ inline auto make_cuda_async() { return std::make_shared(make_cuda()); + return rmm::mr::make_owning_wrapper( + make_cuda(), rmm::percent_of_free_device_memory(50)); } inline auto make_arena() diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp index 470442830..2856cd323 100644 --- a/benchmarks/random_allocations/random_allocations.cpp +++ b/benchmarks/random_allocations/random_allocations.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #include +#include #include #include #include @@ -165,12 +166,13 @@ inline auto make_cuda_async() { return std::make_shared(make_cuda()); + return rmm::mr::make_owning_wrapper( + make_cuda(), rmm::percent_of_free_device_memory(50)); } inline auto make_arena() { - auto free = rmm::detail::available_device_memory().first; + auto free = rmm::available_device_memory().first; constexpr auto reserve{64UL << 20}; // Leave some space for CUDA overhead. return rmm::mr::make_owning_wrapper(make_cuda(), free - reserve); } diff --git a/benchmarks/replay/replay.cpp b/benchmarks/replay/replay.cpp index 320811875..253708ace 100644 --- a/benchmarks/replay/replay.cpp +++ b/benchmarks/replay/replay.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -61,7 +61,7 @@ inline auto make_pool(std::size_t simulated_size) return rmm::mr::make_owning_wrapper( make_simulated(simulated_size), simulated_size, simulated_size); } - return rmm::mr::make_owning_wrapper(make_cuda()); + return rmm::mr::make_owning_wrapper(make_cuda(), 0); } inline auto make_arena(std::size_t simulated_size) diff --git a/doxygen/Doxyfile b/doxygen/Doxyfile index 149603f59..513f15875 100644 --- a/doxygen/Doxyfile +++ b/doxygen/Doxyfile @@ -504,7 +504,7 @@ EXTRACT_PACKAGE = NO # included in the documentation. # The default value is: NO. -EXTRACT_STATIC = NO +EXTRACT_STATIC = YES # If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined # locally in source files will be included in the documentation. If set to NO, diff --git a/include/doxygen_groups.h b/include/doxygen_groups.h index be5eaf17f..70ec0cd68 100644 --- a/include/doxygen_groups.h +++ b/include/doxygen_groups.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,4 +41,5 @@ * @defgroup errors Errors * @defgroup logging Logging * @defgroup thrust_integrations Thrust Integrations + * @defgroup utilities Utilities */ diff --git a/include/rmm/aligned.hpp b/include/rmm/aligned.hpp new file mode 100644 index 000000000..7a0feaabf --- /dev/null +++ b/include/rmm/aligned.hpp @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace rmm { + +/** + * @addtogroup utilities + * @{ + * @file + */ + +/** + * @brief Default alignment used for host memory allocated by RMM. + * + */ +static constexpr std::size_t RMM_DEFAULT_HOST_ALIGNMENT{alignof(std::max_align_t)}; + +/** + * @brief Default alignment used for CUDA memory allocation. + * + */ +static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256}; + +/** + * @brief Returns whether or not `value` is a power of 2. + * + * @param[in] value to check. + * + * @return Whether the input a power of two with non-negative exponent + */ +constexpr bool is_pow2(std::size_t value) { return (value != 0U) && ((value & (value - 1)) == 0U); } + +/** + * @brief Returns whether or not `alignment` is a valid memory alignment. + * + * @param[in] alignment to check + * + * @return Whether the alignment is valid + */ +constexpr bool is_supported_alignment(std::size_t alignment) { return is_pow2(alignment); } + +/** + * @brief Align up to nearest multiple of specified power of 2 + * + * @param[in] value value to align + * @param[in] alignment amount, in bytes, must be a power of 2 + * + * @return Return the aligned value, as one would expect + */ +constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcept +{ + assert(is_supported_alignment(alignment)); + return (value + (alignment - 1)) & ~(alignment - 1); +} + +/** + * @brief Align down to the nearest multiple of specified power of 2 + * + * @param[in] value value to align + * @param[in] alignment amount, in bytes, must be a power of 2 + * + * @return Return the aligned value, as one would expect + */ +constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexcept +{ + assert(is_supported_alignment(alignment)); + return value & ~(alignment - 1); +} + +/** + * @brief Checks whether a value is aligned to a multiple of a specified power of 2 + * + * @param[in] value value to check for alignment + * @param[in] alignment amount, in bytes, must be a power of 2 + * + * @return true if aligned + */ +constexpr bool is_aligned(std::size_t value, std::size_t alignment) noexcept +{ + assert(is_supported_alignment(alignment)); + return value == align_down(value, alignment); +} + +/** + * @brief Checks whether the provided pointer is aligned to a specified @p alignment + * + * @param[in] ptr pointer to check for alignment + * @param[in] alignment required alignment in bytes, must be a power of 2 + * + * @return true if the pointer is aligned + */ +inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT) +{ + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + return is_aligned(reinterpret_cast(ptr), alignment); +} + +/** @} */ // end of group + +} // namespace rmm diff --git a/include/rmm/cuda_device.hpp b/include/rmm/cuda_device.hpp index 054bbb920..565d86926 100644 --- a/include/rmm/cuda_device.hpp +++ b/include/rmm/cuda_device.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include @@ -102,6 +103,49 @@ inline int get_num_cuda_devices() return num_dev; } +/** + * @brief Returns the available and total device memory in bytes for the current device + * + * @return The available and total device memory in bytes for the current device as a std::pair. + */ +inline std::pair available_device_memory() +{ + std::size_t free{}; + std::size_t total{}; + RMM_CUDA_TRY(cudaMemGetInfo(&free, &total)); + return {free, total}; +} + +namespace detail { + +/** + * @brief Returns the available and total device memory in bytes for the current device + * + * @deprecated Use rmm::available_device_memory() instead. + * + * @return The available and total device memory in bytes for the current device as a std::pair. + */ +//[[deprecated("Use `rmm::available_device_memory` instead.")]] // +const auto available_device_memory = rmm::available_device_memory; + +} // namespace detail + +/** + * @brief Returns the approximate specified percent of available device memory on the current CUDA + * device, aligned (down) to the nearest CUDA allocation size. + * + * @param percent The percent of free memory to return. + * + * @return The recommended initial device memory pool size in bytes. + */ +inline std::size_t percent_of_free_device_memory(int percent) +{ + [[maybe_unused]] auto const [free, total] = rmm::available_device_memory(); + auto fraction = static_cast(percent) / 100.0; + return rmm::align_down(static_cast(static_cast(free) * fraction), + rmm::CUDA_ALLOCATION_ALIGNMENT); +} + /** * @brief RAII class that sets the current CUDA device to the specified device on construction * and restores the previous device on destruction. diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp index 321be53b5..54d287bfb 100644 --- a/include/rmm/detail/aligned.hpp +++ b/include/rmm/detail/aligned.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,7 +40,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256}; * @brief Returns whether or not `n` is a power of 2. * */ -constexpr bool is_pow2(std::size_t value) { return (0 == (value & (value - 1))); } +constexpr bool is_pow2(std::size_t value) { return (value != 0U) && ((value & (value - 1)) == 0U); } /** * @brief Returns whether or not `alignment` is a valid memory alignment. @@ -51,7 +51,7 @@ constexpr bool is_supported_alignment(std::size_t alignment) { return is_pow2(al /** * @brief Align up to nearest multiple of specified power of 2 * - * @param[in] v value to align + * @param[in] value value to align * @param[in] alignment amount, in bytes, must be a power of 2 * * @return Return the aligned value, as one would expect @@ -65,7 +65,7 @@ constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcep /** * @brief Align down to the nearest multiple of specified power of 2 * - * @param[in] v value to align + * @param[in] value value to align * @param[in] alignment amount, in bytes, must be a power of 2 * * @return Return the aligned value, as one would expect @@ -79,7 +79,7 @@ constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexc /** * @brief Checks whether a value is aligned to a multiple of a specified power of 2 * - * @param[in] v value to check for alignment + * @param[in] value value to check for alignment * @param[in] alignment amount, in bytes, must be a power of 2 * * @return true if aligned @@ -93,7 +93,7 @@ constexpr bool is_aligned(std::size_t value, std::size_t alignment) noexcept inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT) { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - return rmm::detail::is_aligned(reinterpret_cast(ptr), alignment); + return rmm::detail::is_aligned(reinterpret_cast(ptr), alignment); } /** diff --git a/include/rmm/detail/cuda_util.hpp b/include/rmm/detail/cuda_util.hpp deleted file mode 100644 index 613b8d156..000000000 --- a/include/rmm/detail/cuda_util.hpp +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include - -namespace rmm::detail { - -/// Gets the available and total device memory in bytes for the current device -inline std::pair available_device_memory() -{ - std::size_t free{}; - std::size_t total{}; - RMM_CUDA_TRY(cudaMemGetInfo(&free, &total)); - return {free, total}; -} - -} // namespace rmm::detail diff --git a/include/rmm/mr/device/aligned_resource_adaptor.hpp b/include/rmm/mr/device/aligned_resource_adaptor.hpp index 05e9915cc..be7c3036c 100644 --- a/include/rmm/mr/device/aligned_resource_adaptor.hpp +++ b/include/rmm/mr/device/aligned_resource_adaptor.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,8 +15,8 @@ */ #pragma once +#include #include -#include #include #include @@ -65,12 +65,12 @@ class aligned_resource_adaptor final : public device_memory_resource { * are aligned. */ explicit aligned_resource_adaptor(Upstream* upstream, - std::size_t alignment = rmm::detail::CUDA_ALLOCATION_ALIGNMENT, + std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT, std::size_t alignment_threshold = default_alignment_threshold) : upstream_{upstream}, alignment_{alignment}, alignment_threshold_{alignment_threshold} { RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer."); - RMM_EXPECTS(rmm::detail::is_supported_alignment(alignment), + RMM_EXPECTS(rmm::is_supported_alignment(alignment), "Allocation alignment is not a power of 2."); } @@ -127,14 +127,14 @@ class aligned_resource_adaptor final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - if (alignment_ == rmm::detail::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) { + if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) { return upstream_->allocate(bytes, stream); } auto const size = upstream_allocation_size(bytes); void* pointer = upstream_->allocate(size, stream); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) auto const address = reinterpret_cast(pointer); - auto const aligned_address = rmm::detail::align_up(address, alignment_); + auto const aligned_address = rmm::align_up(address, alignment_); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr) void* aligned_pointer = reinterpret_cast(aligned_address); if (pointer != aligned_pointer) { @@ -153,7 +153,7 @@ class aligned_resource_adaptor final : public device_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { - if (alignment_ == rmm::detail::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) { + if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) { upstream_->deallocate(ptr, bytes, stream); } else { { @@ -208,8 +208,8 @@ class aligned_resource_adaptor final : public device_memory_resource { */ std::size_t upstream_allocation_size(std::size_t bytes) const { - auto const aligned_size = rmm::detail::align_up(bytes, alignment_); - return aligned_size + alignment_ - rmm::detail::CUDA_ALLOCATION_ALIGNMENT; + auto const aligned_size = rmm::align_up(bytes, alignment_); + return aligned_size + alignment_ - rmm::CUDA_ALLOCATION_ALIGNMENT; } Upstream* upstream_; ///< The upstream resource used for satisfying allocation requests diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index 929b8454f..1b821b440 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include #include @@ -145,7 +146,7 @@ class arena_memory_resource final : public device_memory_resource { #ifdef RMM_ARENA_USE_SIZE_CLASSES bytes = rmm::mr::detail::arena::align_to_size_class(bytes); #else - bytes = rmm::detail::align_up(bytes, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + bytes = rmm::align_up(bytes, rmm::CUDA_ALLOCATION_ALIGNMENT); #endif auto& arena = get_arena(stream); @@ -195,7 +196,7 @@ class arena_memory_resource final : public device_memory_resource { #ifdef RMM_ARENA_USE_SIZE_CLASSES bytes = rmm::mr::detail::arena::align_to_size_class(bytes); #else - bytes = rmm::detail::align_up(bytes, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + bytes = rmm::align_up(bytes, rmm::CUDA_ALLOCATION_ALIGNMENT); #endif auto& arena = get_arena(stream); diff --git a/include/rmm/mr/device/binning_memory_resource.hpp b/include/rmm/mr/device/binning_memory_resource.hpp index c2e1621a6..2a9975b18 100644 --- a/include/rmm/mr/device/binning_memory_resource.hpp +++ b/include/rmm/mr/device/binning_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include @@ -138,8 +138,7 @@ class binning_memory_resource final : public device_memory_resource { */ void add_bin(std::size_t allocation_size, device_memory_resource* bin_resource = nullptr) { - allocation_size = - rmm::detail::align_up(allocation_size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + allocation_size = rmm::align_up(allocation_size, rmm::CUDA_ALLOCATION_ALIGNMENT); if (nullptr != bin_resource) { resource_bins_.insert({allocation_size, bin_resource}); diff --git a/include/rmm/mr/device/cuda_async_memory_resource.hpp b/include/rmm/mr/device/cuda_async_memory_resource.hpp index de31c7dc4..f8295c6f6 100644 --- a/include/rmm/mr/device/cuda_async_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,6 @@ #include #include -#include #include #include #include @@ -120,7 +119,7 @@ class cuda_async_memory_resource final : public device_memory_resource { pool_handle(), cudaMemPoolReuseAllowOpportunistic, &disabled)); } - auto const [free, total] = rmm::detail::available_device_memory(); + auto const [free, total] = rmm::available_device_memory(); // Need an l-value to take address to pass to cudaMemPoolSetAttribute uint64_t threshold = release_threshold.value_or(total); diff --git a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp index 825fcab1e..562944669 100644 --- a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,6 @@ #include #include -#include #include #include #include diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index d8da58493..c7965ca34 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,9 +16,9 @@ #pragma once +#include +#include #include -#include -#include #include #include #include @@ -508,8 +508,8 @@ class global_arena final { : upstream_mr_{upstream_mr} { RMM_EXPECTS(nullptr != upstream_mr_, "Unexpected null upstream pointer."); - auto const size = rmm::detail::align_down(arena_size.value_or(default_size()), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + auto const size = + rmm::align_down(arena_size.value_or(default_size()), rmm::CUDA_ALLOCATION_ALIGNMENT); RMM_EXPECTS(size >= superblock::minimum_size, "Arena size smaller than minimum superblock size."); initialize(size); @@ -692,7 +692,7 @@ class global_arena final { */ constexpr std::size_t default_size() const { - auto const [free, total] = rmm::detail::available_device_memory(); + auto const [free, total] = rmm::available_device_memory(); return free / 2; } diff --git a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp index a57bf1c6d..1d6829cb5 100644 --- a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp +++ b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,8 +15,8 @@ */ #pragma once +#include #include -#include #include #include #include @@ -207,7 +207,7 @@ class stream_ordered_memory_resource : public crtp, public device_ auto stream_event = get_event(stream); - size = rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + size = rmm::align_up(size, rmm::CUDA_ALLOCATION_ALIGNMENT); RMM_EXPECTS(size <= this->underlying().get_maximum_allocation_size(), "Maximum allocation size exceeded", rmm::out_of_memory); @@ -241,7 +241,7 @@ class stream_ordered_memory_resource : public crtp, public device_ lock_guard lock(mtx_); auto stream_event = get_event(stream); - size = rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + size = rmm::align_up(size, rmm::CUDA_ALLOCATION_ALIGNMENT); auto const block = this->underlying().free_block(ptr, size); // TODO: cudaEventRecord has significant overhead on deallocations. For the non-PTDS case diff --git a/include/rmm/mr/device/device_memory_resource.hpp b/include/rmm/mr/device/device_memory_resource.hpp index 63e5f39a4..e3014b6c3 100644 --- a/include/rmm/mr/device/device_memory_resource.hpp +++ b/include/rmm/mr/device/device_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -78,10 +78,12 @@ namespace rmm::mr { * device. * * @code{.cpp} - * std::vector> per_device_pools; + * using pool_mr = rmm::mr::pool_memory_resource; + * std::vector> per_device_pools; * for(int i = 0; i < N; ++i) { * cudaSetDevice(i); - * per_device_pools.push_back(std::make_unique()); + * // Note: for brevity, omitting creation of upstream and computing initial_size + * per_device_pools.push_back(std::make_unique(upstream, initial_size)); * set_per_device_resource(cuda_device_id{i}, &per_device_pools.back()); * } * @endcode diff --git a/include/rmm/mr/device/fixed_size_memory_resource.hpp b/include/rmm/mr/device/fixed_size_memory_resource.hpp index 01fb8a6bc..91cc95c53 100644 --- a/include/rmm/mr/device/fixed_size_memory_resource.hpp +++ b/include/rmm/mr/device/fixed_size_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,8 +15,8 @@ */ #pragma once +#include #include -#include #include #include #include @@ -77,7 +77,7 @@ class fixed_size_memory_resource std::size_t block_size = default_block_size, std::size_t blocks_to_preallocate = default_blocks_to_preallocate) : upstream_mr_{upstream_mr}, - block_size_{rmm::detail::align_up(block_size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT)}, + block_size_{rmm::align_up(block_size, rmm::CUDA_ALLOCATION_ALIGNMENT)}, upstream_chunk_size_{block_size * blocks_to_preallocate} { // allocate initial blocks and insert into free list @@ -207,8 +207,7 @@ class fixed_size_memory_resource { // Deallocating a fixed-size block just inserts it in the free list, which is // handled by the parent class - RMM_LOGGING_ASSERT(rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT) <= - block_size_); + RMM_LOGGING_ASSERT(rmm::align_up(size, rmm::CUDA_ALLOCATION_ALIGNMENT) <= block_size_); return block_type{ptr}; } diff --git a/include/rmm/mr/device/limiting_resource_adaptor.hpp b/include/rmm/mr/device/limiting_resource_adaptor.hpp index 6573956d0..2123c3cac 100644 --- a/include/rmm/mr/device/limiting_resource_adaptor.hpp +++ b/include/rmm/mr/device/limiting_resource_adaptor.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include @@ -54,7 +54,7 @@ class limiting_resource_adaptor final : public device_memory_resource { */ limiting_resource_adaptor(Upstream* upstream, std::size_t allocation_limit, - std::size_t alignment = rmm::detail::CUDA_ALLOCATION_ALIGNMENT) + std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT) : allocation_limit_{allocation_limit}, allocated_bytes_(0), alignment_(alignment), @@ -134,7 +134,7 @@ class limiting_resource_adaptor final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - auto const proposed_size = rmm::detail::align_up(bytes, alignment_); + auto const proposed_size = rmm::align_up(bytes, alignment_); auto const old = allocated_bytes_.fetch_add(proposed_size); if (old + proposed_size <= allocation_limit_) { try { @@ -158,7 +158,7 @@ class limiting_resource_adaptor final : public device_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { - std::size_t allocated_size = rmm::detail::align_up(bytes, alignment_); + std::size_t allocated_size = rmm::align_up(bytes, alignment_); upstream_->deallocate(ptr, bytes, stream); allocated_bytes_ -= allocated_size; } diff --git a/include/rmm/mr/device/per_device_resource.hpp b/include/rmm/mr/device/per_device_resource.hpp index 139389f0c..a56a784a1 100644 --- a/include/rmm/mr/device/per_device_resource.hpp +++ b/include/rmm/mr/device/per_device_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -69,6 +69,16 @@ * set_per_device_resource(cuda_device_id{i}, &per_device_pools.back()); * } * @endcode + * @code{.cpp} + * using pool_mr = rmm::mr::pool_memory_resource; + * std::vector> per_device_pools; + * for(int i = 0; i < N; ++i) { + * cudaSetDevice(i); + * // Note: for brevity, omitting creation of upstream and computing initial_size + * per_device_pools.push_back(std::make_unique(upstream, initial_size)); + * set_per_device_resource(cuda_device_id{i}, &per_device_pools.back()); + * } + * @endcode */ namespace rmm::mr { diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp index 20b250524..c0317cf57 100644 --- a/include/rmm/mr/device/pool_memory_resource.hpp +++ b/include/rmm/mr/device/pool_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,9 +15,8 @@ */ #pragma once +#include #include -#include -#include #include #include #include @@ -110,10 +109,37 @@ class pool_memory_resource final friend class detail::stream_ordered_memory_resource, detail::coalescing_free_list>; + /** + * @brief Construct a `pool_memory_resource` and allocate the initial device memory + * pool using `upstream_mr`. + * + * @deprecated Use the constructor that takes an explicit initial pool size instead. + * + * @throws rmm::logic_error if `upstream_mr == nullptr` + * @throws rmm::logic_error if `initial_pool_size` is neither the default nor aligned to a + * multiple of pool_memory_resource::allocation_alignment bytes. + * @throws rmm::logic_error if `maximum_pool_size` is neither the default nor aligned to a + * multiple of pool_memory_resource::allocation_alignment bytes. + * + * @param upstream_mr The memory_resource from which to allocate blocks for the pool. + * @param initial_pool_size Minimum size, in bytes, of the initial pool. Defaults to zero. + * @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all + * of the available memory from the upstream resource. + */ + //[[deprecated("Must specify initial_pool_size")]] // + explicit pool_memory_resource(Upstream* upstream_mr, + thrust::optional initial_pool_size = thrust::nullopt, + thrust::optional maximum_pool_size = thrust::nullopt) + : pool_memory_resource(upstream_mr, initial_pool_size.value_or(0), maximum_pool_size) + { + } + /** * @brief Construct a `pool_memory_resource` and allocate the initial device memory pool using * `upstream_mr`. * + * @deprecated Use the constructor that takes an explicit initial pool size instead. + * * @throws rmm::logic_error if `upstream_mr == nullptr` * @throws rmm::logic_error if `initial_pool_size` is neither the default nor aligned to a * multiple of pool_memory_resource::allocation_alignment bytes. @@ -121,24 +147,46 @@ class pool_memory_resource final * multiple of pool_memory_resource::allocation_alignment bytes. * * @param upstream_mr The memory_resource from which to allocate blocks for the pool. - * @param initial_pool_size Minimum size, in bytes, of the initial pool. Defaults to half of the - * available memory on the current device. + * @param initial_pool_size Minimum size, in bytes, of the initial pool. Defaults to zero. * @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all - * of the available memory on the current device. + * of the available memory from the upstream resource. */ - explicit pool_memory_resource(Upstream* upstream_mr, + template , int> = 0> + //[[deprecated("Must specify initial_pool_size")]] // + explicit pool_memory_resource(Upstream2& upstream_mr, thrust::optional initial_pool_size = thrust::nullopt, thrust::optional maximum_pool_size = thrust::nullopt) + : pool_memory_resource(upstream_mr, initial_pool_size.value_or(0), maximum_pool_size) + { + } + + /** + * @brief Construct a `pool_memory_resource` and allocate the initial device memory pool using + * `upstream_mr`. + * + * @throws rmm::logic_error if `upstream_mr == nullptr` + * @throws rmm::logic_error if `initial_pool_size` is not aligned to a multiple of + * pool_memory_resource::allocation_alignment bytes. + * @throws rmm::logic_error if `maximum_pool_size` is neither the default nor aligned to a + * multiple of pool_memory_resource::allocation_alignment bytes. + * + * @param upstream_mr The memory_resource from which to allocate blocks for the pool. + * @param initial_pool_size Minimum size, in bytes, of the initial pool. + * @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all + * of the available from the upstream resource. + */ + explicit pool_memory_resource(Upstream* upstream_mr, + std::size_t initial_pool_size, + thrust::optional maximum_pool_size = thrust::nullopt) : upstream_mr_{[upstream_mr]() { RMM_EXPECTS(nullptr != upstream_mr, "Unexpected null upstream pointer."); return upstream_mr; }()} { - RMM_EXPECTS(rmm::detail::is_aligned(initial_pool_size.value_or(0), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT), + RMM_EXPECTS(rmm::is_aligned(initial_pool_size, rmm::CUDA_ALLOCATION_ALIGNMENT), "Error, Initial pool size required to be a multiple of 256 bytes"); - RMM_EXPECTS(rmm::detail::is_aligned(maximum_pool_size.value_or(0), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT), + RMM_EXPECTS(rmm::is_aligned(maximum_pool_size.value_or(0), rmm::CUDA_ALLOCATION_ALIGNMENT), "Error, Maximum pool size required to be a multiple of 256 bytes"); initialize_pool(initial_pool_size, maximum_pool_size); @@ -149,21 +197,20 @@ class pool_memory_resource final * `upstream_mr`. * * @throws rmm::logic_error if `upstream_mr == nullptr` - * @throws rmm::logic_error if `initial_pool_size` is neither the default nor aligned to a - * multiple of pool_memory_resource::allocation_alignment bytes. + * @throws rmm::logic_error if `initial_pool_size` is not aligned to a multiple of + * pool_memory_resource::allocation_alignment bytes. * @throws rmm::logic_error if `maximum_pool_size` is neither the default nor aligned to a * multiple of pool_memory_resource::allocation_alignment bytes. * * @param upstream_mr The memory_resource from which to allocate blocks for the pool. - * @param initial_pool_size Minimum size, in bytes, of the initial pool. Defaults to half of the - * available memory on the current device. + * @param initial_pool_size Minimum size, in bytes, of the initial pool. * @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all - * of the available memory on the current device. + * of the available memory from the upstream resource. */ template , int> = 0> explicit pool_memory_resource(Upstream2& upstream_mr, - thrust::optional initial_pool_size = thrust::nullopt, + std::size_t initial_pool_size, thrust::optional maximum_pool_size = thrust::nullopt) : pool_memory_resource(cuda::std::addressof(upstream_mr), initial_pool_size, maximum_pool_size) { @@ -276,38 +323,22 @@ class pool_memory_resource final /** * @brief Allocate initial memory for the pool * - * If initial_size is unset, then queries the upstream memory resource for available memory if - * upstream supports `get_mem_info`, or queries the device (using CUDA API) for available memory - * if not. Then attempts to initialize to half the available memory. - * - * If initial_size is set, then tries to initialize the pool to that size. - * * @param initial_size The optional initial size for the pool * @param maximum_size The optional maximum size for the pool + * + * @throws logic_error if @p initial_size is larger than @p maximum_size (if set). */ - // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) - void initialize_pool(thrust::optional initial_size, - thrust::optional maximum_size) + void initialize_pool(std::size_t initial_size, thrust::optional maximum_size) { - auto const try_size = [&]() { - if (not initial_size.has_value()) { - auto const [free, total] = (get_upstream()->supports_get_mem_info()) - ? get_upstream()->get_mem_info(cuda_stream_legacy) - : rmm::detail::available_device_memory(); - return rmm::detail::align_up(std::min(free, total / 2), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT); - } - return initial_size.value(); - }(); - current_pool_size_ = 0; // try_to_expand will set this if it succeeds maximum_pool_size_ = maximum_size; - RMM_EXPECTS(try_size <= maximum_pool_size_.value_or(std::numeric_limits::max()), - "Initial pool size exceeds the maximum pool size!"); + RMM_EXPECTS( + initial_size <= maximum_pool_size_.value_or(std::numeric_limits::max()), + "Initial pool size exceeds the maximum pool size!"); - if (try_size > 0) { - auto const block = try_to_expand(try_size, try_size, cuda_stream_legacy); + if (initial_size > 0) { + auto const block = try_to_expand(initial_size, initial_size, cuda_stream_legacy); this->insert_block(block, cuda_stream_legacy); } } @@ -346,9 +377,9 @@ class pool_memory_resource final { if (maximum_pool_size_.has_value()) { auto const unaligned_remaining = maximum_pool_size_.value() - pool_size(); - using rmm::detail::align_up; - auto const remaining = align_up(unaligned_remaining, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); - auto const aligned_size = align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + using rmm::align_up; + auto const remaining = align_up(unaligned_remaining, rmm::CUDA_ALLOCATION_ALIGNMENT); + auto const aligned_size = align_up(size, rmm::CUDA_ALLOCATION_ALIGNMENT); return (aligned_size <= remaining) ? std::max(aligned_size, remaining / 2) : 0; } return std::max(size, pool_size()); @@ -416,7 +447,7 @@ class pool_memory_resource final RMM_LOGGING_ASSERT(iter != allocated_blocks_.end()); auto block = *iter; - RMM_LOGGING_ASSERT(block.size() == rmm::detail::align_up(size, allocation_alignment)); + RMM_LOGGING_ASSERT(block.size() == rmm::align_up(size, allocation_alignment)); allocated_blocks_.erase(iter); return block; diff --git a/include/rmm/mr/host/new_delete_resource.hpp b/include/rmm/mr/host/new_delete_resource.hpp index 044f74063..4bb272df3 100644 --- a/include/rmm/mr/host/new_delete_resource.hpp +++ b/include/rmm/mr/host/new_delete_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include +#include #include #include @@ -58,12 +59,11 @@ class new_delete_resource final : public host_memory_resource { * @return Pointer to the newly allocated memory */ void* do_allocate(std::size_t bytes, - std::size_t alignment = rmm::detail::RMM_DEFAULT_HOST_ALIGNMENT) override + std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) override { // If the requested alignment isn't supported, use default - alignment = (rmm::detail::is_supported_alignment(alignment)) - ? alignment - : rmm::detail::RMM_DEFAULT_HOST_ALIGNMENT; + alignment = + (rmm::is_supported_alignment(alignment)) ? alignment : rmm::RMM_DEFAULT_HOST_ALIGNMENT; return rmm::detail::aligned_allocate( bytes, alignment, [](std::size_t size) { return ::operator new(size); }); @@ -84,7 +84,7 @@ class new_delete_resource final : public host_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, - std::size_t alignment = rmm::detail::RMM_DEFAULT_HOST_ALIGNMENT) override + std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) override { rmm::detail::aligned_deallocate( ptr, bytes, alignment, [](void* ptr) { ::operator delete(ptr); }); diff --git a/include/rmm/mr/host/pinned_memory_resource.hpp b/include/rmm/mr/host/pinned_memory_resource.hpp index e49767faf..b5c273ef5 100644 --- a/include/rmm/mr/host/pinned_memory_resource.hpp +++ b/include/rmm/mr/host/pinned_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include #include @@ -114,7 +115,7 @@ class pinned_memory_resource final : public host_memory_resource { */ void deallocate_async(void* ptr, std::size_t bytes, std::size_t alignment, cuda_stream_view) { - do_deallocate(ptr, rmm::detail::align_up(bytes, alignment)); + do_deallocate(ptr, rmm::align_up(bytes, alignment)); } /** @@ -143,9 +144,8 @@ class pinned_memory_resource final : public host_memory_resource { if (0 == bytes) { return nullptr; } // If the requested alignment isn't supported, use default - alignment = (rmm::detail::is_supported_alignment(alignment)) - ? alignment - : rmm::detail::RMM_DEFAULT_HOST_ALIGNMENT; + alignment = + (rmm::is_supported_alignment(alignment)) ? alignment : rmm::RMM_DEFAULT_HOST_ALIGNMENT; return rmm::detail::aligned_allocate(bytes, alignment, [](std::size_t size) { void* ptr{nullptr}; diff --git a/python/docs/conf.py b/python/docs/conf.py index ba5aa6d20..b4c141eb4 100644 --- a/python/docs/conf.py +++ b/python/docs/conf.py @@ -248,14 +248,16 @@ def on_missing_reference(app, env, node, contnode): if match := re.search("(.*)<.*>", reftarget): reftarget = match.group(1) + # This is the document we're linking _from_, and hence where + # we should try and resolve the xref wrt. + refdoc = node.get("refdoc") # Try to find the target prefixed with e.g. namespaces in case that's # all that's missing. Include the empty prefix in case we're searching # for a stripped template. extra_prefixes = ["rmm::", "rmm::mr::", "mr::", ""] - for (name, dispname, type, docname, anchor, priority) in env.domains[ + for (name, dispname, typ, docname, anchor, priority) in env.domains[ "cpp" ].get_objects(): - for prefix in extra_prefixes: if ( name == f"{prefix}{reftarget}" @@ -263,7 +265,7 @@ def on_missing_reference(app, env, node, contnode): ): return env.domains["cpp"].resolve_xref( env, - docname, + refdoc, app.builder, node["reftype"], name, diff --git a/python/docs/librmm_docs/deprecated.rst b/python/docs/librmm_docs/deprecated.rst new file mode 100644 index 000000000..b5ed90caa --- /dev/null +++ b/python/docs/librmm_docs/deprecated.rst @@ -0,0 +1,5 @@ +Deprecated functionality +======================== + +.. doxygenpage:: deprecated + :content-only: diff --git a/python/docs/librmm_docs/index.rst b/python/docs/librmm_docs/index.rst index 6afd94d2e..2b61deb9f 100644 --- a/python/docs/librmm_docs/index.rst +++ b/python/docs/librmm_docs/index.rst @@ -17,6 +17,8 @@ librmm Documentation cuda_streams errors logging + utilities + deprecated .. doxygennamespace:: rmm diff --git a/python/docs/librmm_docs/utilities.rst b/python/docs/librmm_docs/utilities.rst new file mode 100644 index 000000000..25b455746 --- /dev/null +++ b/python/docs/librmm_docs/utilities.rst @@ -0,0 +1,5 @@ +Utilities +============ + +.. doxygengroup:: utilities + :members: diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index ce7f45e19..690e2e338 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -120,12 +120,15 @@ cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \ win32 win32_kmt +cdef extern from "rmm/cuda_device.hpp" namespace "rmm" nogil: + size_t percent_of_free_device_memory(int percent) except + + cdef extern from "rmm/mr/device/pool_memory_resource.hpp" \ namespace "rmm::mr" nogil: cdef cppclass pool_memory_resource[Upstream](device_memory_resource): pool_memory_resource( Upstream* upstream_mr, - optional[size_t] initial_pool_size, + size_t initial_pool_size, optional[size_t] maximum_pool_size) except + size_t pool_size() @@ -369,12 +372,12 @@ cdef class PoolMemoryResource(UpstreamResourceAdaptor): initial_pool_size=None, maximum_pool_size=None ): - cdef optional[size_t] c_initial_pool_size + cdef size_t c_initial_pool_size cdef optional[size_t] c_maximum_pool_size c_initial_pool_size = ( - optional[size_t]() if + percent_of_free_device_memory(50) if initial_pool_size is None - else make_optional[size_t](initial_pool_size) + else initial_pool_size ) c_maximum_pool_size = ( optional[size_t]() if diff --git a/tests/container_multidevice_tests.cu b/tests/container_multidevice_tests.cu index 9de9ddf40..e58ba53a2 100644 --- a/tests/container_multidevice_tests.cu +++ b/tests/container_multidevice_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,12 +15,12 @@ */ #include "device_check_resource_adaptor.hpp" -#include "rmm/mr/device/per_device_resource.hpp" #include #include #include #include +#include #include diff --git a/tests/mr/device/aligned_mr_tests.cpp b/tests/mr/device/aligned_mr_tests.cpp index dfcdfa72f..5fbb4b8f1 100644 --- a/tests/mr/device/aligned_mr_tests.cpp +++ b/tests/mr/device/aligned_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,7 +15,8 @@ */ #include "../../mock_resource.hpp" -#include + +#include #include #include #include @@ -223,7 +224,7 @@ TEST(AlignedTest, AlignRealPointer) auto const threshold{65536}; aligned_real mr{rmm::mr::get_current_device_resource(), alignment, threshold}; void* alloc = mr.allocate(threshold); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(alloc, alignment)); + EXPECT_TRUE(rmm::is_pointer_aligned(alloc, alignment)); mr.deallocate(alloc, threshold); } diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index 7525cac9f..1068e0cf0 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,9 +15,10 @@ */ #include "../../byte_literals.hpp" + +#include +#include #include -#include -#include #include #include #include @@ -487,10 +488,9 @@ TEST_F(ArenaTest, SizeSmallerThanSuperblockSize) // NOLINT TEST_F(ArenaTest, AllocateNinetyPercent) // NOLINT { EXPECT_NO_THROW([]() { // NOLINT(cppcoreguidelines-avoid-goto) - auto const free = rmm::detail::available_device_memory().first; - auto const ninety_percent = - rmm::detail::align_up(static_cast(static_cast(free) * 0.9), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + auto const free = rmm::available_device_memory().first; + auto const ninety_percent = rmm::align_up( + static_cast(static_cast(free) * 0.9), rmm::CUDA_ALLOCATION_ALIGNMENT); arena_mr mr(rmm::mr::get_current_device_resource(), ninety_percent); }()); } @@ -501,7 +501,7 @@ TEST_F(ArenaTest, SmallMediumLarge) // NOLINT arena_mr mr(rmm::mr::get_current_device_resource()); auto* small = mr.allocate(256); auto* medium = mr.allocate(64_MiB); - auto const free = rmm::detail::available_device_memory().first; + auto const free = rmm::available_device_memory().first; auto* large = mr.allocate(free / 3); mr.deallocate(small, 256); mr.deallocate(medium, 64_MiB); diff --git a/tests/mr/device/failure_callback_mr_tests.cpp b/tests/mr/device/failure_callback_mr_tests.cpp index bb5484c69..79acd5c7e 100644 --- a/tests/mr/device/failure_callback_mr_tests.cpp +++ b/tests/mr/device/failure_callback_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,16 +15,17 @@ */ #include "../../byte_literals.hpp" -#include "rmm/cuda_stream_view.hpp" -#include "rmm/mr/device/device_memory_resource.hpp" -#include +#include #include #include +#include #include #include +#include + namespace rmm::test { namespace { diff --git a/tests/mr/device/mr_ref_test.hpp b/tests/mr/device/mr_ref_test.hpp index 804c710a5..25ff76891 100644 --- a/tests/mr/device/mr_ref_test.hpp +++ b/tests/mr/device/mr_ref_test.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,9 +18,10 @@ #include "../../byte_literals.hpp" +#include +#include #include #include -#include #include #include #include @@ -78,7 +79,7 @@ inline void test_allocate(resource_ref ref, std::size_t bytes) try { void* ptr = ref.allocate(bytes); EXPECT_NE(nullptr, ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(ptr)); EXPECT_TRUE(is_device_memory(ptr)); ref.deallocate(ptr, bytes); } catch (rmm::out_of_memory const& e) { @@ -94,7 +95,7 @@ inline void test_allocate_async(async_resource_ref ref, void* ptr = ref.allocate_async(bytes, stream); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(ptr)); EXPECT_TRUE(is_device_memory(ptr)); ref.deallocate_async(ptr, bytes, stream); if (not stream.is_default()) { stream.synchronize(); } @@ -202,7 +203,7 @@ inline void test_random_allocations(resource_ref ref, alloc.size = distribution(generator); EXPECT_NO_THROW(alloc.ptr = ref.allocate(alloc.size)); EXPECT_NE(nullptr, alloc.ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(alloc.ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(alloc.ptr)); }); std::for_each(allocations.begin(), allocations.end(), [&ref](allocation& alloc) { @@ -228,7 +229,7 @@ inline void test_random_async_allocations(async_resource_ref ref, EXPECT_NO_THROW(alloc.ptr = ref.allocate(alloc.size)); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, alloc.ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(alloc.ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(alloc.ptr)); }); std::for_each(allocations.begin(), allocations.end(), [stream, &ref](allocation& alloc) { @@ -269,7 +270,7 @@ inline void test_mixed_random_allocation_free(resource_ref ref, EXPECT_NO_THROW(allocations.emplace_back(ref.allocate(size), size)); auto new_allocation = allocations.back(); EXPECT_NE(nullptr, new_allocation.ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(new_allocation.ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(new_allocation.ptr)); } else { auto const index = static_cast(index_distribution(generator) % active_allocations); active_allocations--; @@ -316,7 +317,7 @@ inline void test_mixed_random_async_allocation_free(async_resource_ref ref, EXPECT_NO_THROW(allocations.emplace_back(ref.allocate_async(size, stream), size)); auto new_allocation = allocations.back(); EXPECT_NE(nullptr, new_allocation.ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(new_allocation.ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(new_allocation.ptr)); } else { auto const index = static_cast(index_distribution(generator) % active_allocations); active_allocations--; @@ -379,7 +380,8 @@ inline auto make_managed() { return std::make_shared(make_cuda()); + return rmm::mr::make_owning_wrapper( + make_cuda(), rmm::percent_of_free_device_memory(50)); } inline auto make_arena() diff --git a/tests/mr/device/mr_test.hpp b/tests/mr/device/mr_test.hpp index 03f880e72..ef4b4bc80 100644 --- a/tests/mr/device/mr_test.hpp +++ b/tests/mr/device/mr_test.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,9 +18,10 @@ #include "../../byte_literals.hpp" +#include +#include #include #include -#include #include #include #include @@ -74,7 +75,7 @@ inline void test_get_current_device_resource() EXPECT_NE(nullptr, rmm::mr::get_current_device_resource()); void* ptr = rmm::mr::get_current_device_resource()->allocate(1_MiB); EXPECT_NE(nullptr, ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(ptr)); EXPECT_TRUE(is_device_memory(ptr)); rmm::mr::get_current_device_resource()->deallocate(ptr, 1_MiB); } @@ -86,7 +87,7 @@ inline void test_allocate(rmm::mr::device_memory_resource* mr, void* ptr = mr->allocate(bytes); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(ptr)); EXPECT_TRUE(is_device_memory(ptr)); mr->deallocate(ptr, bytes); if (not stream.is_default()) { stream.synchronize(); } @@ -154,7 +155,7 @@ inline void test_random_allocations(rmm::mr::device_memory_resource* mr, EXPECT_NO_THROW(alloc.ptr = mr->allocate(alloc.size, stream)); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, alloc.ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(alloc.ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(alloc.ptr)); }); std::for_each(allocations.begin(), allocations.end(), [stream, mr](allocation& alloc) { @@ -196,7 +197,7 @@ inline void test_mixed_random_allocation_free(rmm::mr::device_memory_resource* m EXPECT_NO_THROW(allocations.emplace_back(mr->allocate(size, stream), size)); auto new_allocation = allocations.back(); EXPECT_NE(nullptr, new_allocation.ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(new_allocation.ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(new_allocation.ptr)); } else { auto const index = static_cast(index_distribution(generator) % active_allocations); active_allocations--; @@ -257,7 +258,8 @@ inline auto make_managed() { return std::make_shared(make_cuda()); + return rmm::mr::make_owning_wrapper( + make_cuda(), rmm::percent_of_free_device_memory(50)); } inline auto make_arena() diff --git a/tests/mr/device/pool_mr_tests.cpp b/tests/mr/device/pool_mr_tests.cpp index 2f32889d0..a2793386f 100644 --- a/tests/mr/device/pool_mr_tests.cpp +++ b/tests/mr/device/pool_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,9 +14,8 @@ * limitations under the License. */ +#include #include -#include -#include #include #include #include @@ -39,7 +38,7 @@ using limiting_mr = rmm::mr::limiting_resource_adaptor(static_cast(free) * 0.9), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + auto const ninety_percent_pool = rmm::percent_of_free_device_memory(90); pool_mr mr{rmm::mr::get_current_device_resource(), ninety_percent_pool}; }; EXPECT_NO_THROW(allocate_ninety()); @@ -83,9 +80,8 @@ TEST(PoolTest, AllocateNinetyPercent) TEST(PoolTest, TwoLargeBuffers) { auto two_large = []() { - auto const [free, total] = rmm::detail::available_device_memory(); - (void)total; - pool_mr mr{rmm::mr::get_current_device_resource()}; + [[maybe_unused]] auto const [free, total] = rmm::available_device_memory(); + pool_mr mr{rmm::mr::get_current_device_resource(), rmm::percent_of_free_device_memory(50)}; auto* ptr1 = mr.allocate(free / 4); auto* ptr2 = mr.allocate(free / 4); mr.deallocate(ptr1, free / 4); @@ -158,8 +154,8 @@ TEST(PoolTest, NonAlignedPoolSize) TEST(PoolTest, UpstreamDoesntSupportMemInfo) { cuda_mr cuda; - pool_mr mr1(&cuda); - pool_mr mr2(&mr1); + pool_mr mr1(&cuda, 0); + pool_mr mr2(&mr1, 0); auto* ptr = mr2.allocate(1024); mr2.deallocate(ptr, 1024); } diff --git a/tests/mr/host/mr_ref_tests.cpp b/tests/mr/host/mr_ref_tests.cpp index 6563eb635..416641f18 100644 --- a/tests/mr/host/mr_ref_tests.cpp +++ b/tests/mr/host/mr_ref_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ #include "../../byte_literals.hpp" -#include +#include #include #include #include @@ -35,7 +35,7 @@ namespace rmm::test { namespace { inline bool is_aligned(void* ptr, std::size_t alignment = alignof(std::max_align_t)) { - return rmm::detail::is_pointer_aligned(ptr, alignment); + return rmm::is_pointer_aligned(ptr, alignment); } // Returns true if a pointer points to a device memory or managed memory allocation. diff --git a/tests/mr/host/mr_tests.cpp b/tests/mr/host/mr_tests.cpp index 678d6aeb8..e0078c920 100644 --- a/tests/mr/host/mr_tests.cpp +++ b/tests/mr/host/mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ #include "../../byte_literals.hpp" -#include +#include #include #include #include @@ -35,7 +35,7 @@ namespace rmm::test { namespace { inline bool is_aligned(void* ptr, std::size_t alignment = alignof(std::max_align_t)) { - return rmm::detail::is_pointer_aligned(ptr, alignment); + return rmm::is_pointer_aligned(ptr, alignment); } // Returns true if a pointer points to a device memory or managed memory allocation. diff --git a/tests/mr/host/pinned_pool_mr_tests.cpp b/tests/mr/host/pinned_pool_mr_tests.cpp index dcdae37fa..d10b85e72 100644 --- a/tests/mr/host/pinned_pool_mr_tests.cpp +++ b/tests/mr/host/pinned_pool_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,6 @@ #include #include -#include #include #include #include @@ -33,7 +32,7 @@ using pool_mr = rmm::mr::pool_memory_resource; TEST(PinnedPoolTest, ThrowOnNullUpstream) { - auto construct_nullptr = []() { pool_mr mr{nullptr}; }; + auto construct_nullptr = []() { pool_mr mr{nullptr, 1024}; }; EXPECT_THROW(construct_nullptr(), rmm::logic_error); }