Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[libomptarget][OpenMP] Initial implementation of omp_target_memset() and omp_target_memset_async() #68706

Merged
merged 10 commits into from
Oct 19, 2023
1 change: 1 addition & 0 deletions openmp/libomptarget/include/omptarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,7 @@ int omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize,
const size_t *DstDimensions,
const size_t *SrcDimensions, int DstDevice,
int SrcDevice);
void *omp_target_memset(void *Ptr, int C, size_t N, int DeviceNum);
mjklemm marked this conversation as resolved.
Show resolved Hide resolved
int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr,
size_t Size, size_t DeviceOffset, int DeviceNum);
int omp_target_disassociate_ptr(const void *HostPtr, int DeviceNum);
Expand Down
130 changes: 121 additions & 9 deletions openmp/libomptarget/src/api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,10 +241,125 @@ static int libomp_target_memcpy_async_helper(kmp_int32 Gtid, kmp_task_t *Task) {
return Rc;
}

static int libomp_target_memset_async_helper(kmp_int32 Gtid, kmp_task_t *Task) {
if (!Task) {
return OFFLOAD_FAIL;
}
mjklemm marked this conversation as resolved.
Show resolved Hide resolved

auto *Args = reinterpret_cast<TargetMemsetArgsTy *>(Task->shareds);
if (!Args) {
mjklemm marked this conversation as resolved.
Show resolved Hide resolved
return OFFLOAD_FAIL;
}

// call omp_target_memset()
omp_target_memset(Args->Ptr, Args->C, Args->N, Args->DeviceNum);

delete Args;

return OFFLOAD_SUCCESS;
}

static inline void
ConvertDepObjVector(llvm::SmallVector<kmp_depend_info_t> &Vec, int DepObjCount,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this function name be lower case?

omp_depend_t *DepObjList) {
for (int i = 0; i < DepObjCount; ++i) {
omp_depend_t DepObj = DepObjList[i];
Vec.push_back(*((kmp_depend_info_t *)DepObj));
}
}

static int libomp_helper_memset_task_creation(TargetMemsetArgsTy *Args,
int DepObjCount,
omp_depend_t *DepObjList) {
// Create global thread ID
int Gtid = __kmpc_global_thread_num(nullptr);
int (*Fn)(kmp_int32, kmp_task_t *) = &libomp_target_memset_async_helper;

// Setup the hidden helper flags
kmp_int32 Flags = 0;
kmp_tasking_flags_t *InputFlags = (kmp_tasking_flags_t *)&Flags;
InputFlags->hidden_helper = 1;

// Alloc the helper task
kmp_task_t *Task = __kmpc_omp_target_task_alloc(
nullptr, Gtid, Flags, sizeof(kmp_task_t), 0, Fn, -1);
if (!Task) {
delete Args;
return OFFLOAD_FAIL;
}

// Setup the arguments for the helper task
Task->shareds = Args;

// Convert types of depend objects
llvm::SmallVector<kmp_depend_info_t> DepObjs;
ConvertDepObjVector(DepObjs, DepObjCount, DepObjList);

// Launch the helper task
int Rc = __kmpc_omp_task_with_deps(nullptr, Gtid, Task, DepObjCount,
DepObjs.data(), 0, nullptr);

return Rc;
}

EXTERN void *omp_target_memset(void *Ptr, int C, size_t N, int DeviceNum) {
TIMESCOPE();
DP("Call to omp_target_memset, device %d, device pointer %p, size %zu\n",
DeviceNum, Ptr, N);

// Behave as a no-op if N==0 or if Ptr is nullptr (as a useful implementation
// of unspecified behavior, see OpenMP spec).
if (!Ptr || N == 0) {
return Ptr;
}

if (DeviceNum == omp_get_initial_device()) {
DP("filling memory on host via memset");
memset(Ptr, C, N); // ignore return value, memset() cannot fail
} else {
// TODO: replace the omp_target_memset() slow path with the fast path.
// That will require the ability to execute a kernel from within
// libomptarget.so (which we do not have at the moment).

// This is a very slow path: create a filled array on the host and upload
// it to the GPU device.
int InitialDevice = omp_get_initial_device();
void *Shadow = omp_target_alloc(N, InitialDevice);
(void)memset(Shadow, C, N);
(void)omp_target_memcpy(Ptr, Shadow, N, 0, 0, DeviceNum, InitialDevice);
(void)omp_target_free(Shadow, InitialDevice);
}

DP("omp_target_memset returns %p\n", Ptr);
return Ptr;
}

EXTERN void *omp_target_memset_async(void *Ptr, int C, size_t N, int DeviceNum,
int DepObjCount,
omp_depend_t *DepObjList) {
DP("Call to omp_target_memset_async, device %d, device pointer %p, size %zu",
DeviceNum, Ptr, N);

// Behave as a no-op if N==0 or if Ptr is nullptr (as a useful implementation
// of unspecified behavior, see OpenMP spec).
if (!Ptr || N == 0) {
return Ptr;
}

// Create the task object to deal with the async invocation
auto *Args = new TargetMemsetArgsTy{Ptr, C, N, DeviceNum};

// omp_target_memset_async() cannot fail via a return code, so ignore the
// return code of the helper function
(void)libomp_helper_memset_task_creation(Args, DepObjCount, DepObjList);

return Ptr;
}

// Allocate and launch helper task
static int libomp_helper_task_creation(TargetMemcpyArgsTy *Args,
int DepObjCount,
omp_depend_t *DepObjList) {
static int libomp_helper_memcpy_task_creation(TargetMemcpyArgsTy *Args,
mjklemm marked this conversation as resolved.
Show resolved Hide resolved
int DepObjCount,
omp_depend_t *DepObjList) {
// Create global thread ID
int Gtid = __kmpc_global_thread_num(nullptr);
int (*Fn)(kmp_int32, kmp_task_t *) = &libomp_target_memcpy_async_helper;
Expand All @@ -270,10 +385,7 @@ static int libomp_helper_task_creation(TargetMemcpyArgsTy *Args,

// Convert the type of depend objects
llvm::SmallVector<kmp_depend_info_t> DepObjs;
for (int i = 0; i < DepObjCount; i++) {
omp_depend_t DepObj = DepObjList[i];
DepObjs.push_back(*((kmp_depend_info_t *)DepObj));
}
ConvertDepObjVector(DepObjs, DepObjCount, DepObjList);

// Launch the helper task
int Rc = __kmpc_omp_task_with_deps(nullptr, Gtid, Ptr, DepObjCount,
Expand Down Expand Up @@ -302,7 +414,7 @@ EXTERN int omp_target_memcpy_async(void *Dst, const void *Src, size_t Length,
Dst, Src, Length, DstOffset, SrcOffset, DstDevice, SrcDevice);

// Create and launch helper task
int Rc = libomp_helper_task_creation(Args, DepObjCount, DepObjList);
int Rc = libomp_helper_memcpy_task_creation(Args, DepObjCount, DepObjList);

DP("omp_target_memcpy_async returns %d\n", Rc);
return Rc;
Expand Down Expand Up @@ -399,7 +511,7 @@ EXTERN int omp_target_memcpy_rect_async(
DstDimensions, SrcDimensions, DstDevice, SrcDevice);

// Create and launch helper task
int Rc = libomp_helper_task_creation(Args, DepObjCount, DepObjList);
int Rc = libomp_helper_memcpy_task_creation(Args, DepObjCount, DepObjList);

DP("omp_target_memcpy_rect_async returns %d\n", Rc);
return Rc;
Expand Down
2 changes: 2 additions & 0 deletions openmp/libomptarget/src/exports
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ VERS1.0 {
omp_target_memcpy_rect;
omp_target_memcpy_async;
omp_target_memcpy_rect_async;
omp_target_memset;
omp_target_memset_async;
omp_target_associate_ptr;
omp_target_disassociate_ptr;
llvm_omp_target_alloc_host;
Expand Down
13 changes: 13 additions & 0 deletions openmp/libomptarget/src/private.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,19 @@ struct TargetMemcpyArgsTy {
DstOffsets(DstOffsets), SrcOffsets(SrcOffsets),
DstDimensions(DstDimensions), SrcDimensions(SrcDimensions){};
};

struct TargetMemsetArgsTy {
/**
mjklemm marked this conversation as resolved.
Show resolved Hide resolved
* Common attributes of a memset operation
*/
void *Ptr;
int C;
size_t N;
int DeviceNum;

// no constructors defined, because this is a PoD
};

// Invalid GTID as defined by libomp; keep in sync
#define KMP_GTID_DNE (-2)
#ifdef __cplusplus
Expand Down
45 changes: 45 additions & 0 deletions openmp/libomptarget/test/api/omp_target_memset.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// RUN: %libomptarget-compile-and-run-generic

#include "stdio.h"
#include <omp.h>
#include <stdlib.h>

int main() {
int d = omp_get_default_device();
int id = omp_get_initial_device();
int q[128], i;
void *p;
void *result;

if (d < 0 || d >= omp_get_num_devices())
d = id;

p = omp_target_alloc(130 * sizeof(int), d);
if (p == NULL)
return 0;

for (i = 0; i < 128; i++)
q[i] = i;

result = omp_target_memset(p, 0, 130 * sizeof(int), d);
if (result != p) {
abort();
}

int q2[128];
for (i = 0; i < 128; ++i)
q2[i] = i;
if (omp_target_memcpy_async(q2, p, 128 * sizeof(int), 0, sizeof(int), id, d,
0, NULL))
abort();

#pragma omp taskwait

for (i = 0; i < 128; ++i)
if (q2[i] != 0)
abort();

omp_target_free(p, d);

return 0;
}
2 changes: 2 additions & 0 deletions openmp/runtime/src/dllexports
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,8 @@ kmp_set_warnings_off 780
omp_target_memcpy_rect 887
omp_target_associate_ptr 888
omp_target_disassociate_ptr 889
omp_target_memset 3000
omp_target_memset_async 3001
%endif

kmp_set_disp_num_buffers 890
Expand Down
5 changes: 5 additions & 0 deletions openmp/runtime/src/include/omp.h.var
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,11 @@
extern int __KAI_KMPC_CONVENTION omp_target_memcpy_rect_async(void *, const void *, size_t, int, const size_t *,
const size_t *, const size_t *, const size_t *, const size_t *, int, int,
int, omp_depend_t *);

/* OpenMP 6.0 device memory routines */
extern void * __KAI_KMPC_CONVENTION omp_target_memset(void *, int, size_t, int);
extern void * __KAI_KMPC_CONVENTION omp_target_memset_async(void *, int, size_t, int, int, omp_depend_t *);

/*!
* The `omp_get_mapped_ptr` routine returns the device pointer that is associated with a host pointer for a given device.
*/
Expand Down
22 changes: 22 additions & 0 deletions openmp/runtime/src/include/omp_lib.f90.var
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,28 @@
integer (omp_depend_kind), optional :: depobj_list(*)
end function omp_target_memcpy_rect_async

function omp_target_memset(ptr, val, count, device_num) bind(c)
use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t
type(c_ptr) :: omp_target_memset
type(c_ptr), value :: ptr
integer(c_int), value :: val
integer(c_size_t), value :: count
integer(c_int), value :: device_num
end function

function omp_target_memset_async(ptr, val, count, device_num, &
depobj_count, depobj_list) bind(c)
use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t
use omp_lib_kinds
type(c_ptr) :: omp_target_memset_async
type(c_ptr), value :: ptr
integer(c_int), value :: val
integer(c_size_t), value :: count
integer(c_int), value :: device_num
integer(c_int), value :: depobj_count
integer(omp_depend_kind), optional :: depobj_list(*)
end function

function omp_target_associate_ptr(host_ptr, device_ptr, size, &
device_offset, device_num) bind(c)
use omp_lib_kinds
Expand Down
22 changes: 22 additions & 0 deletions openmp/runtime/src/include/omp_lib.h.var
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,28 @@
integer(omp_depend_kind), optional :: depobj_list(*)
end function omp_target_memcpy_rect_async

function omp_target_memset(ptr, val, count, device_num) bind(c)
use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t
type(c_ptr) :: omp_target_memset
type(c_ptr), value :: ptr
integer(c_int), value :: val
integer(c_size_t), value :: count
integer(c_int), value :: device_num
end function

function omp_target_memset_async(ptr, val, count, device_num, &
depobj_count, depobj_list) bind(c)
use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t
use omp_lib_kinds
type(c_ptr) :: omp_target_memset_async
type(c_ptr), value :: ptr
integer(c_int), value :: val
integer(c_size_t), value :: count
integer(c_int), value :: device_num
integer(c_int), value :: depobj_count
integer(omp_depend_kind), optional :: depobj_list(*)
end function

function omp_target_associate_ptr(host_ptr, device_ptr, size, &
& device_offset, device_num) bind(c)
use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t, c_int
Expand Down
2 changes: 2 additions & 0 deletions openmp/runtime/src/kmp_ftn_os.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@
#define FTN_TARGET_IS_PRESENT omp_target_is_present
#define FTN_TARGET_MEMCPY omp_target_memcpy
#define FTN_TARGET_MEMCPY_RECT omp_target_memcpy_rect
#define FTN_TARGET_MEMSET omp_target_memset
#define FTN_TARGET_MEMSET_ASYNC omp_target_memset_async
#define FTN_TARGET_ASSOCIATE_PTR omp_target_associate_ptr
#define FTN_TARGET_DISASSOCIATE_PTR omp_target_disassociate_ptr
#endif
Expand Down