Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ocl: prepared OpenCL backend for CP2K's Offload/DBM/DBT libraries #742

Merged
merged 1 commit into from
Dec 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions src/acc/acc_bench_smm.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@
#if !defined(NREPEAT)
# define NREPEAT 3
#endif
#if !defined(XREPEAT)
# define XREPEAT 66
#endif
#if !defined(TRANSPOSE)
# define TRANSPOSE
#endif
Expand Down Expand Up @@ -292,7 +295,8 @@ int main(int argc, char* argv[]) {
double duration = 0;
#endif
const char* const env_stack_size = getenv("SMM_BATCHSIZE");
int nrepeat = (0 < inr ? inr : NREPEAT);
const int xrepeat = (0 != check ? NREPEAT : XREPEAT);
int nrepeat = (0 < inr ? inr : xrepeat);
int stack_size, na, nb, nc, nr, r;
if (NULL == env_stack_size) {
stack_size = 0;
Expand Down Expand Up @@ -325,7 +329,7 @@ int main(int argc, char* argv[]) {
const int r = rnd[nok % NRAND], ss = -stack_size, bs = (1 < ss ? ss : BATCHSIZE);
const int limit = (BATCHGRAIN < ss ? ((bs + BATCHGRAIN - 1) / BATCHGRAIN) : ss);
stack_size = (r % limit + 1) * BATCHGRAIN;
nrepeat = MAX((BATCHSIZE * nrepeat + stack_size - 1) / stack_size, NREPEAT);
nrepeat = MAX((BATCHSIZE * nrepeat + stack_size - 1) / stack_size, xrepeat);
}
else stack_size = BATCHSIZE; /* plain default */
}
Expand Down
4 changes: 2 additions & 2 deletions src/acc/opencl/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ endif
endif

CFLAGS := -fPIC \
-Wall -Wextra \
-Wall -Wextra -Wcast-qual \
-Wno-overlength-strings \
-Wno-variadic-macros \
-Wno-unused-function \
Expand Down Expand Up @@ -299,7 +299,7 @@ endif
fi

$(MAKDIR)/smm/opencl_kernels.h: $(MAKDIR)/acc_opencl.sh $(KERNEL) $(PARAMS)
@CPPFLAGS=$(CPP_OPENCL_FLAGS) $(MAKDIR)/acc_opencl.sh $(KERNEL) $(PARAMS) $@
CPPFLAGS=$(CPP_OPENCL_FLAGS) $(MAKDIR)/acc_opencl.sh $(KERNEL) $(PARAMS) $@

.PHONY: backend
backend: $(ACCDIR)/dbcsr_acc.a
Expand Down
69 changes: 50 additions & 19 deletions src/acc/opencl/acc_opencl.c
Original file line number Diff line number Diff line change
Expand Up @@ -531,26 +531,45 @@ int c_dbcsr_acc_init(void) {
else {
result = EXIT_FAILURE;
}
c_dbcsr_acc_opencl_config.handle = 0;
c_dbcsr_acc_opencl_config.handles = NULL;
c_dbcsr_acc_opencl_config.nclmems = c_dbcsr_acc_opencl_config.nevents = 0;
c_dbcsr_acc_opencl_config.clmems = c_dbcsr_acc_opencl_config.events = NULL;
c_dbcsr_acc_opencl_config.storage = NULL;
# if LIBXSMM_VERSION4(1, 17, 0, 0) < LIBXSMM_VERSION_NUMBER && defined(ACC_OPENCL_HANDLES_MAXCOUNT) && \
(0 < ACC_OPENCL_HANDLES_MAXCOUNT)
if (EXIT_SUCCESS == result) {
c_dbcsr_acc_opencl_config.handle = ACC_OPENCL_HANDLES_MAXCOUNT * c_dbcsr_acc_opencl_config.nthreads;
c_dbcsr_acc_opencl_config.handles = (void**)malloc(sizeof(void*) * c_dbcsr_acc_opencl_config.handle);
c_dbcsr_acc_opencl_config.storage = malloc(sizeof(void*) * c_dbcsr_acc_opencl_config.handle);
if (NULL != c_dbcsr_acc_opencl_config.handles && NULL != c_dbcsr_acc_opencl_config.storage) {
libxsmm_pmalloc_init(sizeof(void*), &c_dbcsr_acc_opencl_config.handle, c_dbcsr_acc_opencl_config.handles,
const size_t nhandles = ACC_OPENCL_HANDLES_MAXCOUNT * c_dbcsr_acc_opencl_config.nthreads;
# if defined(ACC_OPENCL_MEM_OFFSET)
c_dbcsr_acc_opencl_config.nclmems = nhandles;
c_dbcsr_acc_opencl_config.clmems = (void**)malloc(sizeof(void*) * nhandles);
c_dbcsr_acc_opencl_config.storage = malloc(sizeof(void*) * (nhandles + nhandles));
if (NULL != c_dbcsr_acc_opencl_config.clmems && NULL != c_dbcsr_acc_opencl_config.storage) {
libxsmm_pmalloc_init(sizeof(void*), &c_dbcsr_acc_opencl_config.nclmems, c_dbcsr_acc_opencl_config.clmems,
(void**)c_dbcsr_acc_opencl_config.storage + nhandles);
}
else {
free(c_dbcsr_acc_opencl_config.clmems);
c_dbcsr_acc_opencl_config.clmems = NULL;
c_dbcsr_acc_opencl_config.nclmems = 0;
result = EXIT_FAILURE;
}
# else
c_dbcsr_acc_opencl_config.storage = malloc(sizeof(void*) * nhandles);
# endif
c_dbcsr_acc_opencl_config.nevents = nhandles;
c_dbcsr_acc_opencl_config.events = (void**)malloc(sizeof(void*) * nhandles);
if (NULL != c_dbcsr_acc_opencl_config.events && NULL != c_dbcsr_acc_opencl_config.storage) {
libxsmm_pmalloc_init(sizeof(void*), &c_dbcsr_acc_opencl_config.nevents, c_dbcsr_acc_opencl_config.events,
c_dbcsr_acc_opencl_config.storage);
}
else {
free(c_dbcsr_acc_opencl_config.handles);
free(c_dbcsr_acc_opencl_config.events);
c_dbcsr_acc_opencl_config.events = NULL;
c_dbcsr_acc_opencl_config.nevents = 0;
result = EXIT_FAILURE;
}
if (EXIT_SUCCESS != result) {
free(c_dbcsr_acc_opencl_config.storage);
c_dbcsr_acc_opencl_config.handles = NULL;
c_dbcsr_acc_opencl_config.storage = NULL;
c_dbcsr_acc_opencl_config.handle = 0;
result = EXIT_FAILURE;
}
}
# endif
Expand Down Expand Up @@ -663,7 +682,7 @@ int c_dbcsr_acc_finalize(void) {
}
}
/* release/reset buffers */
free(c_dbcsr_acc_opencl_config.handles);
free(c_dbcsr_acc_opencl_config.events);
free(c_dbcsr_acc_opencl_config.storage);
free(c_dbcsr_acc_opencl_config.streams);
/* clear configuration */
Expand Down Expand Up @@ -1253,7 +1272,7 @@ int c_dbcsr_acc_opencl_kernel(int source_is_file, const char source[], const cha
else break;
}
# if !defined(NDEBUG)
if (EXIT_SUCCESS == c_dbcsr_acc_opencl_device_ext(active_id, (const char**)&ext, 1))
if (EXIT_SUCCESS == c_dbcsr_acc_opencl_device_ext(active_id, (const char* const*)&ext, 1))
# endif
{ /* NDEBUG: assume given extension is supported (confirmed upfront) */
if (NULL == line) { /* extension is not already part of source */
Expand Down Expand Up @@ -1318,7 +1337,11 @@ int c_dbcsr_acc_opencl_kernel(int source_is_file, const char source[], const cha
: NULL);
if (NULL != src) {
if ((size_t)size == fread(src, 1 /*sizeof(char)*/, size /*count*/, file)) {
if (source != ext_source) libxsmm_free((void*)ext_source);
if (source != ext_source) {
void* p = NULL;
LIBXSMM_ASSIGN127(&p, &ext_source);
libxsmm_free(p);
}
src[size] = '\0';
ext_source = src;
}
Expand Down Expand Up @@ -1357,7 +1380,11 @@ int c_dbcsr_acc_opencl_kernel(int source_is_file, const char source[], const cha
}
ok = EXIT_FAILURE;
}
if (source != ext_source) libxsmm_free((void*)ext_source);
if (source != ext_source) {
void* p = NULL;
LIBXSMM_ASSIGN127(&p, &ext_source);
libxsmm_free(p);
}
buffer[0] = '\0'; /* reset to empty */
if (CL_SUCCESS == result) {
*kernel = clCreateKernel(program, kernel_name, &result);
Expand Down Expand Up @@ -1414,7 +1441,9 @@ int c_dbcsr_acc_opencl_kernel(int source_is_file, const char source[], const cha
}
}
else if (source != ext_source) { /* error: creating program */
libxsmm_free((void*)ext_source);
void* p = NULL;
LIBXSMM_ASSIGN127(&p, &ext_source);
libxsmm_free(p);
}
}
else if (EXIT_SUCCESS == result) { /* binary representation */
Expand All @@ -1424,7 +1453,7 @@ int c_dbcsr_acc_opencl_kernel(int source_is_file, const char source[], const cha
# endif
{
program = clCreateProgramWithBinary(
context, 1, &active_id, &size_src, (const unsigned char**)(const void*)&source, NULL /*binary_status*/, &result);
context, 1, &active_id, &size_src, (const unsigned char**)&source, NULL /*binary_status*/, &result);
}
if (CL_SUCCESS == result) {
assert(NULL != program);
Expand All @@ -1443,7 +1472,7 @@ int c_dbcsr_acc_opencl_kernel(int source_is_file, const char source[], const cha
# endif
{
program = clCreateProgramWithBinary(
context, 1, &active_id, &size_src, (const unsigned char**)(const void*)&source, NULL /*binary_status*/, &result);
context, 1, &active_id, &size_src, (const unsigned char**)&source, NULL /*binary_status*/, &result);
}
assert(CL_SUCCESS != result || NULL != program);
if (CL_SUCCESS == result) {
Expand Down Expand Up @@ -1479,8 +1508,10 @@ int c_dbcsr_acc_opencl_kernel(int source_is_file, const char source[], const cha
}
}
if (NULL != file_src) {
void* p = NULL;
LIBXSMM_ASSIGN127(&p, (const void*)&source);
assert(0 != source_is_file);
libxsmm_free((void*)source);
libxsmm_free(p);
}
# if !defined(NDEBUG)
if (EXIT_SUCCESS != result && NULL != kernel) *kernel = NULL;
Expand Down
24 changes: 15 additions & 9 deletions src/acc/opencl/acc_opencl.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,18 +112,21 @@
# define ACC_OPENCL_STREAM_PRIORITIES
# endif
#endif
/** Stream-argument (ACC-interface) can be NULL (synchronous) */
#if !defined(ACC_OPENCL_STREAM_NULL) && 1
# define ACC_OPENCL_STREAM_NULL
#endif

/** Automatically determine cl_mem offset */
#if !defined(ACC_OPENCL_MEM_OFFSET) && 1
# define ACC_OPENCL_MEM_OFFSET
#endif

/** Use DBCSR's profile for detailed timings */
#if !defined(ACC_OPENCL_PROFILE) && 0
# define ACC_OPENCL_PROFILE
#endif

/* can depend on OpenCL implementation (unlikely) */
#if !defined(ACC_OPENCL_MEM_NOALLOC) && 1
# define ACC_OPENCL_MEM_NOALLOC
# define ACC_OPENCL_MEM(A) ((cl_mem*)&(A))
#else
# define ACC_OPENCL_MEM(A) ((cl_mem*)(A))
#endif
/* attaching c_dbcsr_acc_opencl_info_stream_t is needed */
#define ACC_OPENCL_STREAM(A) ((cl_command_queue*)(A))
/* incompatible with c_dbcsr_acc_event_record */
Expand Down Expand Up @@ -240,9 +243,9 @@ typedef struct c_dbcsr_acc_opencl_config_t {
/** Table of devices (thread-specific). */
c_dbcsr_acc_opencl_device_t* device;
/** Handle-counter. */
size_t handle;
size_t nclmems, nevents;
/** All handles and related storage. */
void **handles, *storage;
void **clmems, **events, *storage;
/** All created streams partitioned by thread-ID (thread-local slots). */
void** streams;
/** Counts number of streams created (thread-local). */
Expand Down Expand Up @@ -293,8 +296,11 @@ typedef struct c_dbcsr_acc_opencl_info_stream_t {
c_dbcsr_acc_opencl_info_stream_t* c_dbcsr_acc_opencl_info_stream(void* stream);
const int* c_dbcsr_acc_opencl_stream_priority(const void* stream);

void* c_dbcsr_acc_opencl_stream_default(void);

/** Get host-pointer associated with device-memory (c_dbcsr_acc_dev_mem_allocate). */
void* c_dbcsr_acc_opencl_get_hostptr(cl_mem memory);
int c_dbcsr_acc_opencl_memset(void* dev_mem, int value, size_t offset, size_t nbytes, void* stream);
/** Amount of device memory; local memory is only non-zero if separate from global. */
int c_dbcsr_acc_opencl_info_devmem(cl_device_id device, size_t* mem_free, size_t* mem_total, size_t* mem_local, int* mem_unified);
/** Get device associated with thread-ID. */
Expand Down
25 changes: 10 additions & 15 deletions src/acc/opencl/acc_opencl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ then
-h|--help)
shift $#;;
-p|--params)
PARAMPATH=yes
PARAMS=$2
PARAMS="$2\t"
shift 2;;
-c|-d|--debug|--comments)
CPPFLAGS+=" -C"
Expand All @@ -50,13 +49,9 @@ then
esac
done
HERE="$(cd "$(dirname "$0")" && pwd -P)"
PARAMDIR=$(if [ ! "${PARAMDIR}" ]; then echo "${HERE}/smm/params"; fi)
if [ "${PARAMPATH}" ]; then
PARAMPATH=${PARAMS}
else
HERE="$(cd "$(dirname "$0")" && pwd -P)"
PARAMPATH=${PARAMDIR}
fi
PARAMDIR=${PARAMDIR:-${PARAMS}}
PARAMDIR=${PARAMDIR:-${HERE}/smm/params}
PARAMDIR=$(echo -e "${PARAMDIR}" | ${TR} -d '\t')
if [ "$#" -gt 1 ]; then
# allow for instance /dev/stdout
if [ "${OFILE##*.}" = "h" ]; then
Expand Down Expand Up @@ -127,7 +122,7 @@ then
NFILES_CSV=0
for CSVFILE in "${CSVFILES[@]}"; do
if [ "${CSVFILE##*.}" = "csv" ]; then
if [ -e "${CSVFILE}" ]; then
if [ -f "${CSVFILE}" ]; then
NFILES_CSV=$((NFILES_CSV+1))
fi
else
Expand All @@ -136,15 +131,15 @@ then
exit 1
fi
done
if [ "0" = "${NFILES_CSV}" ] && [ "${PARAMPATH}" ]; then
CSVFILES=("${PARAMPATH}"/*.csv)
if [ "0" = "${NFILES_CSV}" ] && [ "${PARAMDIR}" ] && [ -d "${PARAMDIR}" ]; then
CSVFILES=("${PARAMDIR}"/*.csv)
NFILES_CSV=${#CSVFILES[@]}
fi
for CSVFILE in "${CSVFILES[@]}"; do
if [ ! "${DELIM}" ]; then
SEPAR=$(${SED} -n "1s/[^${DELIMS}]//gp" "${CSVFILE}")
SEPAR=$(${SED} -n "1s/[^${DELIMS}]//gp" "${CSVFILE}" 2>/dev/null)
DELIM=${SEPAR:0:1}
MATCH=$(${SED} -n "1s/[^${DELIM}]//gp" "${CSVFILE}")
MATCH=$(${SED} -n "1s/[^${DELIM}]//gp" "${CSVFILE}" 2>/dev/null)
fi
if [ "${DELIM}" ]; then
CHECK=$(${SED} "/^[[:space:]]*$/d;s/[^${DELIM}]//g" "${CSVFILE}" | ${SORT} -u | ${SED} -n "0,/./p")
Expand All @@ -156,7 +151,7 @@ then
else
ERRFILE=${CSVFILE}
fi
if [ "${ERRFILE}" ]; then
if [ "${ERRFILE}" ] && [ -f "${ERRFILE}" ]; then
>&2 echo "ERROR: ${ERRFILE} is malformed!"
if [ "${HFILE}" ]; then ${RM} -f "${OFILE}"; fi
exit 1
Expand Down
10 changes: 5 additions & 5 deletions src/acc/opencl/acc_opencl_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,12 @@ int c_dbcsr_acc_event_create(void** event_p) {
if (EXIT_SUCCESS == result)
# endif
{
assert(NULL == c_dbcsr_acc_opencl_config.handles || sizeof(void*) >= sizeof(cl_event));
assert(NULL == c_dbcsr_acc_opencl_config.events || sizeof(void*) >= sizeof(cl_event));
*event_p = (
# if LIBXSMM_VERSION4(1, 17, 0, 0) < LIBXSMM_VERSION_NUMBER && defined(ACC_OPENCL_HANDLES_MAXCOUNT) && \
(0 < ACC_OPENCL_HANDLES_MAXCOUNT)
NULL != c_dbcsr_acc_opencl_config.handles
? libxsmm_pmalloc(c_dbcsr_acc_opencl_config.handles, &c_dbcsr_acc_opencl_config.handle)
NULL != c_dbcsr_acc_opencl_config.events
? libxsmm_pmalloc(c_dbcsr_acc_opencl_config.events, &c_dbcsr_acc_opencl_config.nevents)
:
# endif
malloc(sizeof(cl_event)));
Expand Down Expand Up @@ -110,9 +110,9 @@ int c_dbcsr_acc_event_destroy(void* event) {
if (NULL != clevent) result = clReleaseEvent(clevent);
# if LIBXSMM_VERSION4(1, 17, 0, 0) < LIBXSMM_VERSION_NUMBER && defined(ACC_OPENCL_HANDLES_MAXCOUNT) && \
(0 < ACC_OPENCL_HANDLES_MAXCOUNT)
if (NULL != c_dbcsr_acc_opencl_config.handles) {
if (NULL != c_dbcsr_acc_opencl_config.events) {
/**(cl_event*)event = NULL; assert(NULL == *ACC_OPENCL_EVENT(event));*/
libxsmm_pfree(event, c_dbcsr_acc_opencl_config.handles, &c_dbcsr_acc_opencl_config.handle);
libxsmm_pfree(event, c_dbcsr_acc_opencl_config.events, &c_dbcsr_acc_opencl_config.nevents);
}
else
# endif
Expand Down
Loading
Loading