From f57eee424bd3608e732023aa77a59a9d401bfcc0 Mon Sep 17 00:00:00 2001 From: Andrew Woloszyn Date: Tue, 18 Feb 2025 15:30:17 -0500 Subject: [PATCH 1/2] [hip] Move calls to hipFree and similar to the cleanup thread. This keeps them off the main thread since they can block for an unexpectedly large amount of time. Signed-off-by: Andrew Woloszyn --- .../src/iree/hal/drivers/hip/cleanup_thread.c | 4 +- .../src/iree/hal/drivers/hip/hip_allocator.c | 71 +++++++++++++++---- runtime/src/iree/hal/drivers/hip/hip_device.c | 8 +++ runtime/src/iree/hal/drivers/hip/hip_device.h | 5 ++ 4 files changed, 72 insertions(+), 16 deletions(-) diff --git a/runtime/src/iree/hal/drivers/hip/cleanup_thread.c b/runtime/src/iree/hal/drivers/hip/cleanup_thread.c index 95c7e206b7b9..0e845214c4e6 100644 --- a/runtime/src/iree/hal/drivers/hip/cleanup_thread.c +++ b/runtime/src/iree/hal/drivers/hip/cleanup_thread.c @@ -63,7 +63,9 @@ static int iree_hal_hip_cleanup_thread_main(void* param) { iree_hal_hip_callback_queue_pop_front(&thread->queue, 1); iree_slim_mutex_unlock(&thread->mutex); - if (iree_status_is_ok(status)) { + // If we have a null event then we don't have to wait + // on the GPU to synchronize. + if (iree_status_is_ok(status) && callback.event) { status = IREE_HIP_CALL_TO_STATUS( thread->symbols, hipEventSynchronize(iree_hal_hip_event_handle(callback.event))); diff --git a/runtime/src/iree/hal/drivers/hip/hip_allocator.c b/runtime/src/iree/hal/drivers/hip/hip_allocator.c index 3460f1ccc35c..ecdc21386205 100644 --- a/runtime/src/iree/hal/drivers/hip/hip_allocator.c +++ b/runtime/src/iree/hal/drivers/hip/hip_allocator.c @@ -13,6 +13,7 @@ #include "iree/base/tracing.h" #include "iree/hal/drivers/hip/dynamic_symbols.h" #include "iree/hal/drivers/hip/hip_buffer.h" +#include "iree/hal/drivers/hip/hip_device.h" #include "iree/hal/drivers/hip/per_device_information.h" #include "iree/hal/drivers/hip/status_util.h" #include "iree/hal/drivers/hip/util/queue.h" @@ -569,31 +570,66 @@ static void iree_hal_hip_allocator_deallocate_buffer( iree_hal_buffer_destroy(base_buffer); } -static void iree_hal_hip_buffer_release_callback(void* user_data, - iree_hal_buffer_t* buffer) { - iree_hal_hip_allocator_t* allocator = (iree_hal_hip_allocator_t*)user_data; - - const iree_hal_hip_buffer_type_t buffer_type = - iree_hal_hip_buffer_type(buffer); - - iree_hal_hip_buffer_free(allocator->symbols, buffer_type, - iree_hal_hip_buffer_device_pointer(buffer), - iree_hal_hip_buffer_host_pointer(buffer)); - - switch (buffer_type) { +typedef struct iree_hal_hip_release_async_data_t { + iree_hal_hip_allocator_t* allocator; + iree_hal_hip_buffer_type_t buffer_type; + hipDeviceptr_t device_pointer; + void* host_pointer; + IREE_STATISTICS(iree_hal_memory_type_t memory_type; + iree_device_size_t allocation_size;) +} iree_hal_hip_release_async_data_t; + +static iree_status_t iree_hal_hip_buffer_release_callback_async( + void* user_data, iree_hal_hip_event_t* event, iree_status_t status) { + iree_hal_hip_release_async_data_t* async_data = + (iree_hal_hip_release_async_data_t*)user_data; + + iree_hal_hip_buffer_free(async_data->allocator->symbols, + async_data->buffer_type, async_data->device_pointer, + async_data->host_pointer); + + switch (async_data->buffer_type) { case IREE_HAL_HIP_BUFFER_TYPE_DEVICE: case IREE_HAL_HIP_BUFFER_TYPE_HOST: { IREE_TRACE_FREE_NAMED(IREE_HAL_HIP_ALLOCATOR_ID, - (void*)iree_hal_hip_buffer_device_pointer(buffer)); + (void*)async_data->device_pointer); IREE_STATISTICS(iree_hal_allocator_statistics_record_free( - &allocator->statistics, iree_hal_buffer_memory_type(buffer), - iree_hal_buffer_allocation_size(buffer))); + &async_data->allocator->statistics, async_data->memory_type, + async_data->allocation_size)); break; } default: // Buffer type not tracked. break; } + iree_allocator_free(async_data->allocator->host_allocator, async_data); + return status; +} + +static void iree_hal_hip_buffer_release_callback(void* user_data, + iree_hal_buffer_t* buffer) { + iree_hal_hip_allocator_t* allocator = (iree_hal_hip_allocator_t*)user_data; + + iree_hal_hip_release_async_data_t* release_async_data = NULL; + + iree_status_t status = iree_allocator_malloc(allocator->host_allocator, + sizeof(*release_async_data), + (void**)&release_async_data); + if (iree_status_is_ok(status)) { + release_async_data->allocator = allocator; + release_async_data->device_pointer = + iree_hal_hip_buffer_device_pointer(buffer); + release_async_data->host_pointer = iree_hal_hip_buffer_host_pointer(buffer); + release_async_data->buffer_type = iree_hal_hip_buffer_type(buffer); + IREE_STATISTICS(release_async_data->memory_type = + iree_hal_buffer_memory_type(buffer); + release_async_data->allocation_size = + iree_hal_buffer_allocation_size(buffer);) + status = iree_hal_hip_device_add_asynchronous_cleanup( + allocator->parent_device, &iree_hal_hip_buffer_release_callback_async, + (void*)release_async_data); + } + iree_status_ignore(status); } static iree_status_t iree_hal_hip_allocator_import_buffer( @@ -766,6 +802,7 @@ iree_status_t iree_hal_hip_allocator_alloc_async( iree_hal_buffer_allocation_size(buffer), IREE_HOST_SIZE_MAX); } + IREE_TRACE_ZONE_BEGIN(z0); int device_ordinal = 0; device_ordinal = @@ -837,6 +874,8 @@ iree_status_t iree_hal_hip_allocator_alloc_async( iree_hal_hip_buffer_set_allocation_empty(buffer); } + IREE_TRACE_ZONE_END(z0); + return status; } @@ -849,6 +888,7 @@ iree_status_t iree_hal_hip_allocator_free_async( return iree_ok_status(); } + IREE_TRACE_ZONE_BEGIN(z0); IREE_TRACE_FREE_NAMED(IREE_HAL_HIP_ALLOCATOR_ID, (void*)device_ptr); IREE_STATISTICS(iree_hal_allocator_statistics_record_free( &allocator->statistics, iree_hal_buffer_memory_type(buffer), @@ -888,6 +928,7 @@ iree_status_t iree_hal_hip_allocator_free_async( if (iree_status_is_ok(status)) { iree_hal_hip_buffer_set_allocation_empty(buffer); } + IREE_TRACE_ZONE_END(z0); return status; } diff --git a/runtime/src/iree/hal/drivers/hip/hip_device.c b/runtime/src/iree/hal/drivers/hip/hip_device.c index 8e944d6ab3c4..10e6bd7210c3 100644 --- a/runtime/src/iree/hal/drivers/hip/hip_device.c +++ b/runtime/src/iree/hal/drivers/hip/hip_device.c @@ -1108,6 +1108,14 @@ static iree_status_t iree_hal_hip_device_stream_add_cleanup( return status; } +iree_status_t iree_hal_hip_device_add_asynchronous_cleanup( + iree_hal_device_t* base_device, iree_hal_hip_cleanup_callback_t callback, + void* user_data) { + iree_hal_hip_device_t* device = iree_hal_hip_device_cast(base_device); + return iree_hal_hip_cleanup_thread_add_cleanup(device->cleanup_thread, NULL, + callback, user_data); +} + static iree_status_t iree_hal_hip_device_stream_signal_semaphores_and_add_cleanup( iree_hal_hip_device_t* device, iree_hal_hip_cleanup_thread_t* thread, diff --git a/runtime/src/iree/hal/drivers/hip/hip_device.h b/runtime/src/iree/hal/drivers/hip/hip_device.h index f8fc144d90f5..4d0ebb51afe4 100644 --- a/runtime/src/iree/hal/drivers/hip/hip_device.h +++ b/runtime/src/iree/hal/drivers/hip/hip_device.h @@ -12,6 +12,7 @@ #include "iree/base/api.h" #include "iree/hal/api.h" #include "iree/hal/drivers/hip/api.h" +#include "iree/hal/drivers/hip/cleanup_thread.h" #include "iree/hal/drivers/hip/dynamic_symbols.h" #include "iree/hal/drivers/hip/rccl_dynamic_symbols.h" @@ -49,4 +50,8 @@ static inline hipDeviceptr_t iree_hal_hip_device_size_to_hip_device_prt( return (hipDeviceptr_t)p; } +iree_status_t iree_hal_hip_device_add_asynchronous_cleanup( + iree_hal_device_t* base_device, iree_hal_hip_cleanup_callback_t callback, + void* user_data); + #endif // IREE_HAL_DRIVERS_HIP_DEVICE_H_ From 40ba0bafb307fd6959d499b6eab4d63e44ad7969 Mon Sep 17 00:00:00 2001 From: Andrew Woloszyn Date: Tue, 18 Feb 2025 15:51:20 -0500 Subject: [PATCH 2/2] Fixed formatting Signed-off-by: Andrew Woloszyn --- runtime/src/iree/hal/drivers/hip/hip_allocator.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/runtime/src/iree/hal/drivers/hip/hip_allocator.c b/runtime/src/iree/hal/drivers/hip/hip_allocator.c index ecdc21386205..1429ba2cbf50 100644 --- a/runtime/src/iree/hal/drivers/hip/hip_allocator.c +++ b/runtime/src/iree/hal/drivers/hip/hip_allocator.c @@ -621,10 +621,11 @@ static void iree_hal_hip_buffer_release_callback(void* user_data, iree_hal_hip_buffer_device_pointer(buffer); release_async_data->host_pointer = iree_hal_hip_buffer_host_pointer(buffer); release_async_data->buffer_type = iree_hal_hip_buffer_type(buffer); - IREE_STATISTICS(release_async_data->memory_type = - iree_hal_buffer_memory_type(buffer); - release_async_data->allocation_size = - iree_hal_buffer_allocation_size(buffer);) + IREE_STATISTICS({ + release_async_data->memory_type = iree_hal_buffer_memory_type(buffer); + release_async_data->allocation_size = + iree_hal_buffer_allocation_size(buffer); + }) status = iree_hal_hip_device_add_asynchronous_cleanup( allocator->parent_device, &iree_hal_hip_buffer_release_callback_async, (void*)release_async_data);