From 39edcd0e91f0d97a986803fb10815c089274f557 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 31 Aug 2023 05:36:59 +0000 Subject: [PATCH 01/11] Disable clang-tidy check for do while in macros --- .clang-tidy | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.clang-tidy b/.clang-tidy index 04689c330..9b3f844c9 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -64,4 +64,6 @@ CheckOptions: value: '1' - key: readability-magic-numbers.IgnorePowersOf2IntegerValues value: '1' + - key: cppcoreguidelines-avoid-do-while.IgnoreMacros + value: 'true' ... From 421007be66af3dd714d00d68a6c2eb0fa99ea43a Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 31 Aug 2023 06:33:01 +0000 Subject: [PATCH 02/11] Add new CUDA device utilities --- include/rmm/cuda_device.hpp | 49 +++++++++++++++++-- include/rmm/detail/dynamic_load_runtime.hpp | 7 ++- .../mr/device/cuda_async_memory_resource.hpp | 2 +- .../cuda_async_view_memory_resource.hpp | 2 +- include/rmm/mr/device/per_device_resource.hpp | 4 +- tests/mr/device/cuda_async_view_mr_tests.cpp | 4 +- 6 files changed, 53 insertions(+), 15 deletions(-) diff --git a/include/rmm/cuda_device.hpp b/include/rmm/cuda_device.hpp index ab225490e..e57d779d9 100644 --- a/include/rmm/cuda_device.hpp +++ b/include/rmm/cuda_device.hpp @@ -42,7 +42,6 @@ struct cuda_device_id { value_type id_; }; -namespace detail { /** * @brief Returns a `cuda_device_id` for the current device * @@ -50,11 +49,51 @@ namespace detail { * * @return `cuda_device_id` for the current device */ -inline cuda_device_id current_device() +inline cuda_device_id get_current_cuda_device() { - int dev_id{}; - RMM_CUDA_TRY(cudaGetDevice(&dev_id)); + cuda_device_id::value_type dev_id{}; + RMM_ASSERT_CUDA_SUCCESS(cudaGetDevice(&dev_id)); return cuda_device_id{dev_id}; } -} // namespace detail + +/** + * @brief Returns the number of CUDA devices in the system + * + * @return Number of CUDA devices in the system + */ +inline int get_num_cuda_devices() +{ + cuda_device_id::value_type num_dev{}; + RMM_ASSERT_CUDA_SUCCESS(cudaGetDeviceCount(&num_dev)); + return num_dev; +} + +/** + * @brief RAII class that sets the current CUDA device to the specified device on construction + * and restores the previous device on destruction. + */ +struct cuda_set_device_raii { + /** + * @brief Construct a new cuda_set_device_raii object and sets the current CUDA device to `dev_id` + * + * @param dev_id The device to set as the current CUDA device + */ + explicit cuda_set_device_raii(cuda_device_id dev_id) : old_device_{get_current_cuda_device()} + { + RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(dev_id.value())); + } + /** + * @brief Reactivates the previous CUDA device + */ + ~cuda_set_device_raii() noexcept { RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(old_device_.value())); } + + cuda_set_device_raii(cuda_set_device_raii const&) = delete; + cuda_set_device_raii& operator=(cuda_set_device_raii const&) = delete; + cuda_set_device_raii(cuda_set_device_raii&&) = delete; + cuda_set_device_raii& operator=(cuda_set_device_raii&&) = delete; + + private: + cuda_device_id old_device_; +}; + } // namespace rmm diff --git a/include/rmm/detail/dynamic_load_runtime.hpp b/include/rmm/detail/dynamic_load_runtime.hpp index 28121e6a8..dc0a912e7 100644 --- a/include/rmm/detail/dynamic_load_runtime.hpp +++ b/include/rmm/detail/dynamic_load_runtime.hpp @@ -113,9 +113,8 @@ struct async_alloc { static auto driver_supports_pool{[] { int cuda_pool_supported{}; - auto result = cudaDeviceGetAttribute(&cuda_pool_supported, - cudaDevAttrMemoryPoolsSupported, - rmm::detail::current_device().value()); + auto result = cudaDeviceGetAttribute( + &cuda_pool_supported, cudaDevAttrMemoryPoolsSupported, rmm::get_current_cuda_device().value()); return result == cudaSuccess and cuda_pool_supported == 1; }()}; return runtime_supports_pool and driver_supports_pool; @@ -139,7 +138,7 @@ struct async_alloc { if (cudaMemHandleTypeNone != handle_type) { auto const result = cudaDeviceGetAttribute(&supported_handle_types_bitmask, cudaDevAttrMemoryPoolSupportedHandleTypes, - rmm::detail::current_device().value()); + rmm::get_current_cuda_device().value()); // Don't throw on cudaErrorInvalidValue auto const unsupported_runtime = (result == cudaErrorInvalidValue); diff --git a/include/rmm/mr/device/cuda_async_memory_resource.hpp b/include/rmm/mr/device/cuda_async_memory_resource.hpp index d41eae63e..329d8f29a 100644 --- a/include/rmm/mr/device/cuda_async_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_memory_resource.hpp @@ -98,7 +98,7 @@ class cuda_async_memory_resource final : public device_memory_resource { RMM_EXPECTS(rmm::detail::async_alloc::is_export_handle_type_supported(pool_props.handleTypes), "Requested IPC memory handle type not supported"); pool_props.location.type = cudaMemLocationTypeDevice; - pool_props.location.id = rmm::detail::current_device().value(); + pool_props.location.id = rmm::get_current_cuda_device().value(); cudaMemPool_t cuda_pool_handle{}; RMM_CUDA_TRY(rmm::detail::async_alloc::cudaMemPoolCreate(&cuda_pool_handle, &pool_props)); pool_ = cuda_async_view_memory_resource{cuda_pool_handle}; diff --git a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp index 191e4741d..fc40baac3 100644 --- a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp @@ -60,7 +60,7 @@ class cuda_async_view_memory_resource final : public device_memory_resource { }()} { // Check if cudaMallocAsync Memory pool supported - auto const device = rmm::detail::current_device(); + auto const device = rmm::get_current_cuda_device(); int cuda_pool_supported{}; auto result = cudaDeviceGetAttribute(&cuda_pool_supported, cudaDevAttrMemoryPoolsSupported, device.value()); diff --git a/include/rmm/mr/device/per_device_resource.hpp b/include/rmm/mr/device/per_device_resource.hpp index 4ddbd874a..f37f213a9 100644 --- a/include/rmm/mr/device/per_device_resource.hpp +++ b/include/rmm/mr/device/per_device_resource.hpp @@ -196,7 +196,7 @@ inline device_memory_resource* set_per_device_resource(cuda_device_id device_id, */ inline device_memory_resource* get_current_device_resource() { - return get_per_device_resource(rmm::detail::current_device()); + return get_per_device_resource(rmm::get_current_cuda_device()); } /** @@ -225,6 +225,6 @@ inline device_memory_resource* get_current_device_resource() */ inline device_memory_resource* set_current_device_resource(device_memory_resource* new_mr) { - return set_per_device_resource(rmm::detail::current_device(), new_mr); + return set_per_device_resource(rmm::get_current_cuda_device(), new_mr); } } // namespace rmm::mr diff --git a/tests/mr/device/cuda_async_view_mr_tests.cpp b/tests/mr/device/cuda_async_view_mr_tests.cpp index 86cb6f106..209429b4b 100644 --- a/tests/mr/device/cuda_async_view_mr_tests.cpp +++ b/tests/mr/device/cuda_async_view_mr_tests.cpp @@ -31,7 +31,7 @@ TEST(PoolTest, UsePool) { cudaMemPool_t memPool{}; RMM_CUDA_TRY(rmm::detail::async_alloc::cudaDeviceGetDefaultMemPool( - &memPool, rmm::detail::current_device().value())); + &memPool, rmm::get_current_cuda_device().value())); const auto pool_init_size{100}; cuda_async_view_mr mr{memPool}; @@ -44,7 +44,7 @@ TEST(PoolTest, NotTakingOwnershipOfPool) { cudaMemPoolProps poolProps = {}; poolProps.allocType = cudaMemAllocationTypePinned; - poolProps.location.id = rmm::detail::current_device().value(); + poolProps.location.id = rmm::get_current_cuda_device().value(); poolProps.location.type = cudaMemLocationTypeDevice; cudaMemPool_t memPool{}; From 429c71d5dfa475cee28b4d3ebcd726102ffbdbf5 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 31 Aug 2023 06:33:27 +0000 Subject: [PATCH 03/11] Add failing (in debug build) multidevice pool test --- tests/mr/device/pool_mr_tests.cpp | 39 +++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tests/mr/device/pool_mr_tests.cpp b/tests/mr/device/pool_mr_tests.cpp index c5df1951c..a43088c4e 100644 --- a/tests/mr/device/pool_mr_tests.cpp +++ b/tests/mr/device/pool_mr_tests.cpp @@ -14,10 +14,12 @@ * limitations under the License. */ +#include "rmm/cuda_device.hpp" #include #include #include #include +#include #include #include #include @@ -150,5 +152,42 @@ TEST(PoolTest, UpstreamDoesntSupportMemInfo) mr2.deallocate(ptr, 1024); } +TEST(PoolTest, MultidevicePool) +{ + using MemoryResource = rmm::mr::pool_memory_resource; + + // Get the number of cuda devices + int num_devices = rmm::get_num_cuda_devices(); + + // only run on multidevice systems + if (num_devices >= 2) { + rmm::mr::cuda_memory_resource general_mr; + + // initializing pool_memory_resource of multiple devices + int devices = 2; + size_t pool_size = 1024; + std::vector> mrs; + + for (int i = 0; i < devices; ++i) { + RMM_CUDA_TRY(cudaSetDevice(i)); + auto mr = std::make_shared(&general_mr, pool_size, pool_size); + rmm::mr::set_per_device_resource(rmm::cuda_device_id{i}, mr.get()); + mrs.emplace_back(mr); + } + + { + RMM_CUDA_TRY(cudaSetDevice(0)); + rmm::device_buffer buf_a(16, rmm::cuda_stream_per_thread, mrs[0].get()); + + { + RMM_CUDA_TRY(cudaSetDevice(1)); + rmm::device_buffer buf_b(16, rmm::cuda_stream_per_thread, mrs[1].get()); + } + + RMM_CUDA_TRY(cudaSetDevice(0)); + } + } +} + } // namespace } // namespace rmm::test From f01d797cd0aa2d0d1c6dd5662dbdc99da8b59e24 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 31 Aug 2023 07:12:39 +0000 Subject: [PATCH 04/11] Store per-device TLS event wrappers instead of a single event wrapper for each thread. --- .../detail/stream_ordered_memory_resource.hpp | 36 ++++++++++++------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp index 53575e5ce..5f044b537 100644 --- a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp +++ b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp @@ -15,15 +15,16 @@ */ #pragma once +#include #include #include #include #include -#include - #include +#include + #include #include #include @@ -288,17 +289,26 @@ class stream_ordered_memory_resource : public crtp, public device_ stream_event_pair get_event(cuda_stream_view stream) { if (stream.is_per_thread_default()) { - // Create a thread-local shared event wrapper. Shared pointers in the thread and in each MR - // instance ensures it is destroyed cleaned up only after all are finished with it. - thread_local auto event_tls = std::make_shared(); - default_stream_events.insert(event_tls); - return stream_event_pair{stream.value(), event_tls->event}; + // Create a thread-local shared event wrapper for each device. Shared pointers in the thread + // and in each MR instance ensure the wrappers are destroyed only after all are finished + // with them. + thread_local std::vector> events_tls( + rmm::get_num_cuda_devices()); + auto event = [&, device_id = this->device_id_]() { + if (events_tls[device_id.value()]) { return events_tls[device_id.value()]->event; } + + auto event = std::make_shared(); + events_tls[device_id.value()] = event; + this->default_stream_events.insert(event); + return event->event; + }(); + return stream_event_pair{stream.value(), event}; } // We use cudaStreamLegacy as the event map key for the default stream for consistency between // PTDS and non-PTDS mode. In PTDS mode, the cudaStreamLegacy map key will only exist if the // user explicitly passes it, so it is used as the default location for the free list - // at construction. For consistency, the same key is used for null stream free lists in non-PTDS - // mode. + // at construction. For consistency, the same key is used for null stream free lists in + // non-PTDS mode. // NOLINTNEXTLINE(cppcoreguidelines-pro-type-cstyle-cast) auto* const stream_to_store = stream.is_default() ? cudaStreamLegacy : stream.value(); @@ -496,11 +506,13 @@ class stream_ordered_memory_resource : public crtp, public device_ // bidirectional mapping between non-default streams and events std::unordered_map stream_events_; - // shared pointers to events keeps the events alive as long as either the thread that created them - // or the MR that is using them exists. + // shared pointers to events keeps the events alive as long as either the thread that created + // them or the MR that is using them exists. std::set> default_stream_events; std::mutex mtx_; // mutex for thread-safe access -}; // namespace detail + + rmm::cuda_device_id device_id_{rmm::get_current_cuda_device()}; +}; // namespace detail } // namespace rmm::mr::detail From 79a298d7b1542b25cd46a231d8b25ea43a8050c5 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 31 Aug 2023 07:38:22 +0000 Subject: [PATCH 05/11] style --- include/rmm/detail/dynamic_load_runtime.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/rmm/detail/dynamic_load_runtime.hpp b/include/rmm/detail/dynamic_load_runtime.hpp index dc0a912e7..b45dbae25 100644 --- a/include/rmm/detail/dynamic_load_runtime.hpp +++ b/include/rmm/detail/dynamic_load_runtime.hpp @@ -113,8 +113,9 @@ struct async_alloc { static auto driver_supports_pool{[] { int cuda_pool_supported{}; - auto result = cudaDeviceGetAttribute( - &cuda_pool_supported, cudaDevAttrMemoryPoolsSupported, rmm::get_current_cuda_device().value()); + auto result = cudaDeviceGetAttribute(&cuda_pool_supported, + cudaDevAttrMemoryPoolsSupported, + rmm::get_current_cuda_device().value()); return result == cudaSuccess and cuda_pool_supported == 1; }()}; return runtime_supports_pool and driver_supports_pool; From d27f57468e4e78791f2bfc575cacc48a17c7d421 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 5 Sep 2023 02:35:11 +0000 Subject: [PATCH 06/11] Add compile_commands.json to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 1ab57e4d4..ad6c8ebf7 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ DartConfiguration.tcl .DS_Store *.manifest *.spec +compile_commands.json ## Python build directories & artifacts dist/ From f299c032bb48cb0af7b6d3ee37f2420f0e33b181 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 5 Sep 2023 02:35:27 +0000 Subject: [PATCH 07/11] Header in angle brackets --- tests/mr/device/pool_mr_tests.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/mr/device/pool_mr_tests.cpp b/tests/mr/device/pool_mr_tests.cpp index a43088c4e..4a234d2f9 100644 --- a/tests/mr/device/pool_mr_tests.cpp +++ b/tests/mr/device/pool_mr_tests.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "rmm/cuda_device.hpp" +#include #include #include #include @@ -102,7 +102,7 @@ TEST(PoolTest, ForceGrowth) EXPECT_NO_THROW(mr.allocate(1000)); EXPECT_THROW(mr.allocate(4000), rmm::out_of_memory); // too much EXPECT_NO_THROW(mr.allocate(500)); - EXPECT_NO_THROW(mr.allocate(2000)); // fits + EXPECT_NO_THROW(mr.allocate(2000)); // fits } } From 28c094d0804d0d2fb01552477d879d60d203442f Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 6 Sep 2023 00:21:07 +0000 Subject: [PATCH 08/11] style --- tests/mr/device/pool_mr_tests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/mr/device/pool_mr_tests.cpp b/tests/mr/device/pool_mr_tests.cpp index 4a234d2f9..6bcd90527 100644 --- a/tests/mr/device/pool_mr_tests.cpp +++ b/tests/mr/device/pool_mr_tests.cpp @@ -102,7 +102,7 @@ TEST(PoolTest, ForceGrowth) EXPECT_NO_THROW(mr.allocate(1000)); EXPECT_THROW(mr.allocate(4000), rmm::out_of_memory); // too much EXPECT_NO_THROW(mr.allocate(500)); - EXPECT_NO_THROW(mr.allocate(2000)); // fits + EXPECT_NO_THROW(mr.allocate(2000)); // fits } } From bbe46bd445090e349c9f5040652e8e7b8868baff Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 6 Sep 2023 23:44:51 +0000 Subject: [PATCH 09/11] Add doc and example about dtors under Multiple Devices --- README.md | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 8d65b7d33..0166ea09b 100644 --- a/README.md +++ b/README.md @@ -354,11 +354,32 @@ objects for each device and sets them as the per-device resource for that device ```c++ std::vector> per_device_pools; for(int i = 0; i < N; ++i) { - cudaSetDevice(i); // set device i before creating MR - // Use a vector of unique_ptr to maintain the lifetime of the MRs - per_device_pools.push_back(std::make_unique()); - // Set the per-device resource for device i - set_per_device_resource(cuda_device_id{i}, &per_device_pools.back()); + cudaSetDevice(i); // set device i before creating MR + // Use a vector of unique_ptr to maintain the lifetime of the MRs + per_device_pools.push_back(std::make_unique()); + // Set the per-device resource for device i + set_per_device_resource(cuda_device_id{i}, &per_device_pools.back()); +} +``` + +Note that the CUDA device that is current when creating a `device_memory_resource` must also be +current any time that `device_memory_resource` is used to deallocate memory, including in a +destructor. This affects RAII classes like `rmm::device_buffer` and `rmm::device_uvector`. Here's an +(incorrect) example that assumes the above example loop has been run to create a +`pool_memory_resource` for each device. A correct example adds a call to `cudaSetDevice(1)` on the +line of the error comment. + +```c++ +{ + RMM_CUDA_TRY(cudaSetDevice(0)); + rmm::device_buffer buf_a(16); + + { + RMM_CUDA_TRY(cudaSetDevice(1)); + rmm::device_buffer buf_b(16); + } + + // Error: when buf_a is destroyed, the current device must be 0, but it is 1 } ``` From 397336b64fd263574cb68d8b0e198310d307fbdf Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 13 Sep 2023 09:39:21 +1000 Subject: [PATCH 10/11] Fix device ID typo. Co-authored-by: Bradley Dice --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0166ea09b..2fa4761df 100644 --- a/README.md +++ b/README.md @@ -366,7 +366,7 @@ Note that the CUDA device that is current when creating a `device_memory_resourc current any time that `device_memory_resource` is used to deallocate memory, including in a destructor. This affects RAII classes like `rmm::device_buffer` and `rmm::device_uvector`. Here's an (incorrect) example that assumes the above example loop has been run to create a -`pool_memory_resource` for each device. A correct example adds a call to `cudaSetDevice(1)` on the +`pool_memory_resource` for each device. A correct example adds a call to `cudaSetDevice(0)` on the line of the error comment. ```c++ From 44c89cc8e0e099b4626f90498191bcd7325ea389 Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Wed, 13 Sep 2023 00:25:03 +0000 Subject: [PATCH 11/11] Eliminate extraneous shared pointer copy and be smarter about cudaSetDevice in RAII class. --- include/rmm/cuda_device.hpp | 15 ++++++++++----- .../detail/stream_ordered_memory_resource.hpp | 5 ++--- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/include/rmm/cuda_device.hpp b/include/rmm/cuda_device.hpp index cbed4de1a..8d355ee23 100644 --- a/include/rmm/cuda_device.hpp +++ b/include/rmm/cuda_device.hpp @@ -51,7 +51,7 @@ struct cuda_device_id { */ inline cuda_device_id get_current_cuda_device() { - cuda_device_id::value_type dev_id{}; + cuda_device_id::value_type dev_id{-1}; RMM_ASSERT_CUDA_SUCCESS(cudaGetDevice(&dev_id)); return cuda_device_id{dev_id}; } @@ -63,7 +63,7 @@ inline cuda_device_id get_current_cuda_device() */ inline int get_num_cuda_devices() { - cuda_device_id::value_type num_dev{}; + cuda_device_id::value_type num_dev{-1}; RMM_ASSERT_CUDA_SUCCESS(cudaGetDeviceCount(&num_dev)); return num_dev; } @@ -78,14 +78,18 @@ struct cuda_set_device_raii { * * @param dev_id The device to set as the current CUDA device */ - explicit cuda_set_device_raii(cuda_device_id dev_id) : old_device_{get_current_cuda_device()} + explicit cuda_set_device_raii(cuda_device_id dev_id) + : old_device_{get_current_cuda_device()}, needs_reset_{old_device_.value() != dev_id.value()} { - RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(dev_id.value())); + if (needs_reset_) RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(dev_id.value())); } /** * @brief Reactivates the previous CUDA device */ - ~cuda_set_device_raii() noexcept { RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(old_device_.value())); } + ~cuda_set_device_raii() noexcept + { + if (needs_reset_) RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(old_device_.value())); + } cuda_set_device_raii(cuda_set_device_raii const&) = delete; cuda_set_device_raii& operator=(cuda_set_device_raii const&) = delete; @@ -94,6 +98,7 @@ struct cuda_set_device_raii { private: cuda_device_id old_device_; + bool needs_reset_; }; } // namespace rmm diff --git a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp index 5f044b537..f071717c0 100644 --- a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp +++ b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp @@ -297,10 +297,9 @@ class stream_ordered_memory_resource : public crtp, public device_ auto event = [&, device_id = this->device_id_]() { if (events_tls[device_id.value()]) { return events_tls[device_id.value()]->event; } - auto event = std::make_shared(); - events_tls[device_id.value()] = event; + auto event = std::make_shared(); this->default_stream_events.insert(event); - return event->event; + return (events_tls[device_id.value()] = std::move(event))->event; }(); return stream_event_pair{stream.value(), event}; }