Skip to content

Commit

Permalink
[hip] Move calls to hipFree and similar to the cleanup thread. (#20020)
Browse files Browse the repository at this point in the history
This keeps them off the main thread since they can block for an
unexpectedly large amount of time.

---------

Signed-off-by: Andrew Woloszyn <[email protected]>
  • Loading branch information
AWoloszyn authored Feb 19, 2025
1 parent b85c180 commit 6c07258
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 16 deletions.
4 changes: 3 additions & 1 deletion runtime/src/iree/hal/drivers/hip/cleanup_thread.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,9 @@ static int iree_hal_hip_cleanup_thread_main(void* param) {
iree_hal_hip_callback_queue_pop_front(&thread->queue, 1);
iree_slim_mutex_unlock(&thread->mutex);

if (iree_status_is_ok(status)) {
// If we have a null event then we don't have to wait
// on the GPU to synchronize.
if (iree_status_is_ok(status) && callback.event) {
status = IREE_HIP_CALL_TO_STATUS(
thread->symbols,
hipEventSynchronize(iree_hal_hip_event_handle(callback.event)));
Expand Down
72 changes: 57 additions & 15 deletions runtime/src/iree/hal/drivers/hip/hip_allocator.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "iree/base/tracing.h"
#include "iree/hal/drivers/hip/dynamic_symbols.h"
#include "iree/hal/drivers/hip/hip_buffer.h"
#include "iree/hal/drivers/hip/hip_device.h"
#include "iree/hal/drivers/hip/per_device_information.h"
#include "iree/hal/drivers/hip/status_util.h"
#include "iree/hal/drivers/hip/util/queue.h"
Expand Down Expand Up @@ -569,31 +570,67 @@ static void iree_hal_hip_allocator_deallocate_buffer(
iree_hal_buffer_destroy(base_buffer);
}

static void iree_hal_hip_buffer_release_callback(void* user_data,
iree_hal_buffer_t* buffer) {
iree_hal_hip_allocator_t* allocator = (iree_hal_hip_allocator_t*)user_data;

const iree_hal_hip_buffer_type_t buffer_type =
iree_hal_hip_buffer_type(buffer);

iree_hal_hip_buffer_free(allocator->symbols, buffer_type,
iree_hal_hip_buffer_device_pointer(buffer),
iree_hal_hip_buffer_host_pointer(buffer));

switch (buffer_type) {
typedef struct iree_hal_hip_release_async_data_t {
iree_hal_hip_allocator_t* allocator;
iree_hal_hip_buffer_type_t buffer_type;
hipDeviceptr_t device_pointer;
void* host_pointer;
IREE_STATISTICS(iree_hal_memory_type_t memory_type;
iree_device_size_t allocation_size;)
} iree_hal_hip_release_async_data_t;

static iree_status_t iree_hal_hip_buffer_release_callback_async(
void* user_data, iree_hal_hip_event_t* event, iree_status_t status) {
iree_hal_hip_release_async_data_t* async_data =
(iree_hal_hip_release_async_data_t*)user_data;

iree_hal_hip_buffer_free(async_data->allocator->symbols,
async_data->buffer_type, async_data->device_pointer,
async_data->host_pointer);

switch (async_data->buffer_type) {
case IREE_HAL_HIP_BUFFER_TYPE_DEVICE:
case IREE_HAL_HIP_BUFFER_TYPE_HOST: {
IREE_TRACE_FREE_NAMED(IREE_HAL_HIP_ALLOCATOR_ID,
(void*)iree_hal_hip_buffer_device_pointer(buffer));
(void*)async_data->device_pointer);
IREE_STATISTICS(iree_hal_allocator_statistics_record_free(
&allocator->statistics, iree_hal_buffer_memory_type(buffer),
iree_hal_buffer_allocation_size(buffer)));
&async_data->allocator->statistics, async_data->memory_type,
async_data->allocation_size));
break;
}
default:
// Buffer type not tracked.
break;
}
iree_allocator_free(async_data->allocator->host_allocator, async_data);
return status;
}

static void iree_hal_hip_buffer_release_callback(void* user_data,
iree_hal_buffer_t* buffer) {
iree_hal_hip_allocator_t* allocator = (iree_hal_hip_allocator_t*)user_data;

iree_hal_hip_release_async_data_t* release_async_data = NULL;

iree_status_t status = iree_allocator_malloc(allocator->host_allocator,
sizeof(*release_async_data),
(void**)&release_async_data);
if (iree_status_is_ok(status)) {
release_async_data->allocator = allocator;
release_async_data->device_pointer =
iree_hal_hip_buffer_device_pointer(buffer);
release_async_data->host_pointer = iree_hal_hip_buffer_host_pointer(buffer);
release_async_data->buffer_type = iree_hal_hip_buffer_type(buffer);
IREE_STATISTICS({
release_async_data->memory_type = iree_hal_buffer_memory_type(buffer);
release_async_data->allocation_size =
iree_hal_buffer_allocation_size(buffer);
})
status = iree_hal_hip_device_add_asynchronous_cleanup(
allocator->parent_device, &iree_hal_hip_buffer_release_callback_async,
(void*)release_async_data);
}
iree_status_ignore(status);
}

static iree_status_t iree_hal_hip_allocator_import_buffer(
Expand Down Expand Up @@ -766,6 +803,7 @@ iree_status_t iree_hal_hip_allocator_alloc_async(
iree_hal_buffer_allocation_size(buffer),
IREE_HOST_SIZE_MAX);
}
IREE_TRACE_ZONE_BEGIN(z0);

int device_ordinal = 0;
device_ordinal =
Expand Down Expand Up @@ -837,6 +875,8 @@ iree_status_t iree_hal_hip_allocator_alloc_async(
iree_hal_hip_buffer_set_allocation_empty(buffer);
}

IREE_TRACE_ZONE_END(z0);

return status;
}

Expand All @@ -849,6 +889,7 @@ iree_status_t iree_hal_hip_allocator_free_async(
return iree_ok_status();
}

IREE_TRACE_ZONE_BEGIN(z0);
IREE_TRACE_FREE_NAMED(IREE_HAL_HIP_ALLOCATOR_ID, (void*)device_ptr);
IREE_STATISTICS(iree_hal_allocator_statistics_record_free(
&allocator->statistics, iree_hal_buffer_memory_type(buffer),
Expand Down Expand Up @@ -888,6 +929,7 @@ iree_status_t iree_hal_hip_allocator_free_async(
if (iree_status_is_ok(status)) {
iree_hal_hip_buffer_set_allocation_empty(buffer);
}
IREE_TRACE_ZONE_END(z0);

return status;
}
Expand Down
8 changes: 8 additions & 0 deletions runtime/src/iree/hal/drivers/hip/hip_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -1116,6 +1116,14 @@ static iree_status_t iree_hal_hip_device_stream_add_cleanup(
return status;
}

iree_status_t iree_hal_hip_device_add_asynchronous_cleanup(
iree_hal_device_t* base_device, iree_hal_hip_cleanup_callback_t callback,
void* user_data) {
iree_hal_hip_device_t* device = iree_hal_hip_device_cast(base_device);
return iree_hal_hip_cleanup_thread_add_cleanup(device->cleanup_thread, NULL,
callback, user_data);
}

static iree_status_t
iree_hal_hip_device_stream_signal_semaphores_and_add_cleanup(
iree_hal_hip_device_t* device, hipStream_t stream,
Expand Down
5 changes: 5 additions & 0 deletions runtime/src/iree/hal/drivers/hip/hip_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "iree/base/api.h"
#include "iree/hal/api.h"
#include "iree/hal/drivers/hip/api.h"
#include "iree/hal/drivers/hip/cleanup_thread.h"
#include "iree/hal/drivers/hip/dynamic_symbols.h"
#include "iree/hal/drivers/hip/rccl_dynamic_symbols.h"

Expand Down Expand Up @@ -49,4 +50,8 @@ static inline hipDeviceptr_t iree_hal_hip_device_size_to_hip_device_prt(
return (hipDeviceptr_t)p;
}

iree_status_t iree_hal_hip_device_add_asynchronous_cleanup(
iree_hal_device_t* base_device, iree_hal_hip_cleanup_callback_t callback,
void* user_data);

#endif // IREE_HAL_DRIVERS_HIP_DEVICE_H_

0 comments on commit 6c07258

Please sign in to comment.