From bb5c3e2c702ee08cede79e46561c15ee23b6c308 Mon Sep 17 00:00:00 2001 From: Henri Vasserman Date: Sat, 13 May 2023 22:04:17 +0300 Subject: [PATCH 01/15] remove constants --- ggml-opencl.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/ggml-opencl.c b/ggml-opencl.c index 31ab13b25d1b8..a959f3aefd6a5 100644 --- a/ggml-opencl.c +++ b/ggml-opencl.c @@ -16,48 +16,48 @@ typedef uchar uint8_t; typedef int int32_t; typedef uint uint32_t; -constant uint QK4_0 = 32; +//constant uint QK4_0 = 32; struct block_q4_0 { float d; - uint8_t qs[QK4_0 / 2]; + uint8_t qs[16]; // QK4_0 / 2 }; -constant uint QK4_1 = 32; +//constant uint QK4_1 = 32; struct block_q4_1 { float d; float m; - uint8_t qs[QK4_1 / 2]; + uint8_t qs[16]; // QK4_1 / 2 }; -constant uint QK5_0 = 32; +//constant uint QK5_0 = 32; struct __attribute__ ((packed)) block_q5_0 { half d; uint32_t qh; - uint8_t qs[QK5_0 / 2]; + uint8_t qs[16]; // QK5_0 / 2 }; -constant uint QK5_1 = 32; +//constant uint QK5_1 = 32; struct block_q5_1 { half d; half m; uint32_t qh; - uint8_t qs[QK5_1 / 2]; + uint8_t qs[16]; // QK5_1 / 2 }; -constant uint QK8_0 = 32; +//constant uint QK8_0 = 32; struct block_q8_0 { float d; - uint8_t qs[QK8_0]; + uint8_t qs[16]; // QK8_0 / 2 }; __kernel void dequantize_row_q4_0(__global struct block_q4_0* x, __global float* y) { - constant uint qk = QK4_0; + constant uint qk = 32; // QK4_0; const uint i = get_global_id(0) / qk; const uint j = get_local_id(0); @@ -72,7 +72,7 @@ __kernel void dequantize_row_q4_0(__global struct block_q4_0* x, __global float* } __kernel void dequantize_row_q4_1(__global struct block_q4_1* x, __global float* y) { - constant uint qk = QK4_1; + constant uint qk = 32; // QK4_1; const uint i = get_global_id(0) / qk; const uint j = get_local_id(0); @@ -88,7 +88,7 @@ __kernel void dequantize_row_q4_1(__global struct block_q4_1* x, __global float* } __kernel void dequantize_row_q5_0(__global struct block_q5_0* x, __global float* y) { - constant uint qk = QK5_0; + constant uint qk = 32; // QK5_0; const uint i = get_global_id(0) / qk; const uint j = get_local_id(0); @@ -108,7 +108,7 @@ __kernel void dequantize_row_q5_0(__global struct block_q5_0* x, __global float* } __kernel void dequantize_row_q5_1(__global struct block_q5_1* x, __global float* y) { - constant uint qk = QK5_1; + constant uint qk = 32; // QK5_1; const uint i = get_global_id(0) / qk; const uint j = get_local_id(0); @@ -129,7 +129,7 @@ __kernel void dequantize_row_q5_1(__global struct block_q5_1* x, __global float* } __kernel void dequantize_row_q8_0(__global struct block_q8_0* x, __global float* y) { - constant uint qk = QK8_0; + constant uint qk = 32; // QK8_0; const uint i = get_global_id(0) / qk; const uint j = get_local_id(0); From b8fb5cdf5cc9241db737f0c613776727b78a32a6 Mon Sep 17 00:00:00 2001 From: Henri Vasserman Date: Sat, 13 May 2023 22:04:46 +0300 Subject: [PATCH 02/15] rewrite platform and device selection --- ggml-opencl.c | 143 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 116 insertions(+), 27 deletions(-) diff --git a/ggml-opencl.c b/ggml-opencl.c index a959f3aefd6a5..32c1fac959fd1 100644 --- a/ggml-opencl.c +++ b/ggml-opencl.c @@ -143,7 +143,7 @@ __kernel void dequantize_row_q8_0(__global struct block_q8_0* x, __global float* do { \ cl_int err_ = (err); \ if (err_ != CL_SUCCESS) { \ - fprintf(stderr, "OpenCL %s error %d at %s:%d\n", name, err_, __FILE__, __LINE__); \ + fprintf(stderr, "ggml_opencl: %s error %d at %s:%d\n", name, err_, __FILE__, __LINE__); \ exit(1); \ } \ } while (0) @@ -152,6 +152,7 @@ static cl_platform_id platform; static cl_device_id device; static cl_context context; static cl_command_queue queue; +static cl_bool out_of_order; static cl_program program; static cl_kernel kernel_q4_0, kernel_q4_1, kernel_q5_0, kernel_q5_1, kernel_q8_0; static cl_mem cl_buffer_a, cl_buffer_qb, cl_buffer_b, cl_buffer_c; @@ -188,35 +189,123 @@ static cl_program build_program_from_source(cl_context ctx, cl_device_id dev, co void ggml_cl_init(void) { cl_int err = 0; - char * GGML_CLBLAST_PLATFORM = getenv("GGML_CLBLAST_PLATFORM"); - char * GGML_CLBLAST_DEVICE = getenv("GGML_CLBLAST_DEVICE"); - int plat_num = (GGML_CLBLAST_PLATFORM == NULL ? 0 : atoi(GGML_CLBLAST_PLATFORM)); - int dev_num = (GGML_CLBLAST_DEVICE == NULL ? 0 : atoi(GGML_CLBLAST_DEVICE)); - printf("\nInitializing CLBlast (First Run)..."); - printf("\nAttempting to use: Platform=%d, Device=%d (If invalid, program will crash)\n",plat_num,dev_num); - cl_uint num_platforms; - clGetPlatformIDs(0, NULL, &num_platforms); - cl_platform_id* platforms = (cl_platform_id*)malloc(num_platforms*sizeof(cl_platform_id)); - clGetPlatformIDs(num_platforms, platforms, NULL); - platform = platforms[plat_num]; - char platform_buffer[1024]; - clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(platform_buffer), &platform_buffer, NULL); - cl_uint num_devices; - clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices); - cl_device_id* devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id)); - clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devices, devices, NULL); - device = devices[dev_num]; - char device_buffer[1024]; - clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_buffer), &device_buffer, NULL); - printf("Using Platform: %s Device: %s\n", platform_buffer, device_buffer); - context = clCreateContext(NULL, 1, &device, NULL, NULL, &err); - CL_CHECK(err, "clCreateContext"); + + enum { NPLAT = 16, NDEV = 16 }; + + char text_buffer[1024] = {0}; + + platform = NULL; + char * GGML_OPENCL_PLATFORM = getenv("GGML_OPENCL_PLATFORM"); + if (GGML_OPENCL_PLATFORM != NULL) { + cl_platform_id platforms[NPLAT]; + cl_uint num_platforms; + err = clGetPlatformIDs(NPLAT, platforms, &num_platforms); + CL_CHECK(err, "clGetPlatformIDs"); + + unsigned plat_num; + if (sscanf(GGML_OPENCL_PLATFORM, " %u", &plat_num) == 1) { + if (plat_num >= num_platforms) { + fprintf(stderr, "ggml_opencl: There is no platform %d\n", plat_num); + exit(1); + } else { + platform = platforms[plat_num]; + clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(text_buffer), &text_buffer, NULL); + } + } else { + for (unsigned i = 0; i < num_platforms; i++) { + clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(text_buffer), &text_buffer, NULL); + if (strstr(text_buffer, GGML_OPENCL_PLATFORM) != NULL) { + platform = platforms[i]; + break; + } + } + } + if (platform == NULL) { + fprintf(stderr, "ggml_opencl: no platform matching '%s' was found.\n", GGML_OPENCL_PLATFORM); + exit(1); + } else { + fprintf(stderr, "ggml_opencl: selecting platform: '%s'\n", text_buffer); + } + } + + text_buffer[0] = 0; + device = NULL; + char * GGML_OPENCL_DEVICE = getenv("GGML_OPENCL_DEVICE"); + if (GGML_OPENCL_DEVICE != NULL) { + cl_device_id devices[16]; + cl_uint num_devices; + clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, NDEV, devices, &num_devices); + + unsigned dev_num; + if (sscanf(GGML_OPENCL_DEVICE, " %u", &dev_num) == 1) { + if (dev_num >= num_devices) { + fprintf(stderr, "ggml_opencl: There is no device %d\n", dev_num); + exit(1); + } else { + device = devices[dev_num]; + clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(text_buffer), &text_buffer, NULL); + } + } else { + for (unsigned i = 0; i < num_devices; i++) { + clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(text_buffer), &text_buffer, NULL); + if (strstr(text_buffer, GGML_OPENCL_DEVICE) != NULL) { + device = devices[i]; + break; + } + } + } + if (device == NULL) { + fprintf(stderr, "ggml_opencl: no device matching '%s' was found.\n", GGML_OPENCL_DEVICE); + exit(1); + } else { + fprintf(stderr, "ggml_opencl: selecting device: '%s'\n", text_buffer); + } + } + + cl_context_properties *properties = platform == NULL ? NULL : (cl_context_properties[]){ + (intptr_t)CL_CONTEXT_PLATFORM, (intptr_t)platform, 0 + }; + + if (device != NULL) { + context = clCreateContext(properties, 1, &device, NULL, NULL, &err); + CL_CHECK(err, "clCreateContext"); + } else { + context = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, NULL, NULL, &err); + if (err == CL_DEVICE_NOT_AVAILABLE || err == CL_DEVICE_NOT_FOUND) { + context = clCreateContextFromType(properties, CL_DEVICE_TYPE_DEFAULT, NULL, NULL, &err); + if (err == CL_DEVICE_NOT_AVAILABLE || err == CL_DEVICE_NOT_FOUND) { + context = clCreateContextFromType(properties, CL_DEVICE_TYPE_ALL, NULL, NULL, &err); + } + } + CL_CHECK(err, "clCreateContextFromType"); + } + + if (device == NULL) { + err = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(&device), &device, NULL); + CL_CHECK(err, "clGetContextInfo"); + if (platform == NULL) { + err = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(&platform), &platform, NULL); + CL_CHECK(err, "clGetDeviceInfo"); + } + } + + if (platform != NULL) { + clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(text_buffer), &text_buffer, NULL); + fprintf(stderr, "ggml_opencl: using platform: '%s'\n", text_buffer); + } + if (device != NULL) { + clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(text_buffer), &text_buffer, NULL); + fprintf(stderr, "ggml_opencl: using device: '%s'\n", text_buffer); + } + + out_of_order = CL_TRUE; queue = clCreateCommandQueue(context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); + if (err == CL_INVALID_PROPERTY) { + out_of_order = CL_FALSE; + queue = clCreateCommandQueue(context, device, 0, &err); + } CL_CHECK(err, "clCreateCommandQueue"); - free(platforms); - free(devices); - program = build_program_from_source(context, device, clblast_dequant); // Prepare dequantize kernels From 0453ce3f8b955c91a931c0c2a79f7403b0584b7f Mon Sep 17 00:00:00 2001 From: Henri Vasserman Date: Sun, 14 May 2023 12:47:41 +0300 Subject: [PATCH 03/15] Remove all constants --- ggml-opencl.c | 40 +++++++++++++--------------------------- 1 file changed, 13 insertions(+), 27 deletions(-) diff --git a/ggml-opencl.c b/ggml-opencl.c index 32c1fac959fd1..a9ab4620fb490 100644 --- a/ggml-opencl.c +++ b/ggml-opencl.c @@ -10,56 +10,49 @@ #include "ggml.h" #define MULTILINE_QUOTE(...) #__VA_ARGS__ -const char * clblast_dequant = MULTILINE_QUOTE( +static const char * program_source = MULTILINE_QUOTE( typedef uchar uint8_t; typedef int int32_t; typedef uint uint32_t; -//constant uint QK4_0 = 32; struct block_q4_0 { float d; - uint8_t qs[16]; // QK4_0 / 2 + uint8_t qs[16]; /* QK4_0 / 2 */ }; -//constant uint QK4_1 = 32; struct block_q4_1 { float d; float m; - uint8_t qs[16]; // QK4_1 / 2 + uint8_t qs[16]; /* QK4_1 / 2 */ }; -//constant uint QK5_0 = 32; struct __attribute__ ((packed)) block_q5_0 { half d; uint32_t qh; - uint8_t qs[16]; // QK5_0 / 2 + uint8_t qs[16]; /* QK5_0 / 2 */ }; -//constant uint QK5_1 = 32; struct block_q5_1 { half d; half m; uint32_t qh; - uint8_t qs[16]; // QK5_1 / 2 + uint8_t qs[16]; /* QK5_1 / 2 */ }; -//constant uint QK8_0 = 32; struct block_q8_0 { float d; - uint8_t qs[16]; // QK8_0 / 2 + uint8_t qs[16]; /* QK8_0 / 2 */ }; __kernel void dequantize_row_q4_0(__global struct block_q4_0* x, __global float* y) { - constant uint qk = 32; // QK4_0; - - const uint i = get_global_id(0) / qk; + const uint i = get_global_id(0) / 32; /* QK4_0 */ const uint j = get_local_id(0); const float d = x[i].d; @@ -72,9 +65,7 @@ __kernel void dequantize_row_q4_0(__global struct block_q4_0* x, __global float* } __kernel void dequantize_row_q4_1(__global struct block_q4_1* x, __global float* y) { - constant uint qk = 32; // QK4_1; - - const uint i = get_global_id(0) / qk; + const uint i = get_global_id(0) / 32; /* QK4_1 */ const uint j = get_local_id(0); const float d = x[i].d; @@ -88,9 +79,7 @@ __kernel void dequantize_row_q4_1(__global struct block_q4_1* x, __global float* } __kernel void dequantize_row_q5_0(__global struct block_q5_0* x, __global float* y) { - constant uint qk = 32; // QK5_0; - - const uint i = get_global_id(0) / qk; + const uint i = get_global_id(0) / 32; /* QK5_0 */ const uint j = get_local_id(0); const float d = vload_half(0, (__global half*) &x[i].d); @@ -108,9 +97,7 @@ __kernel void dequantize_row_q5_0(__global struct block_q5_0* x, __global float* } __kernel void dequantize_row_q5_1(__global struct block_q5_1* x, __global float* y) { - constant uint qk = 32; // QK5_1; - - const uint i = get_global_id(0) / qk; + const uint i = get_global_id(0) / 32; /* QK5_1 */ const uint j = get_local_id(0); const float d = vload_half(0, (__global half*) &x[i].d); @@ -129,8 +116,7 @@ __kernel void dequantize_row_q5_1(__global struct block_q5_1* x, __global float* } __kernel void dequantize_row_q8_0(__global struct block_q8_0* x, __global float* y) { - constant uint qk = 32; // QK8_0; - const uint i = get_global_id(0) / qk; + const uint i = get_global_id(0) / 32; /* QK8_0 */ const uint j = get_local_id(0); const float d = x[i].d; @@ -300,13 +286,13 @@ void ggml_cl_init(void) { out_of_order = CL_TRUE; queue = clCreateCommandQueue(context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - if (err == CL_INVALID_PROPERTY) { + if (err == CL_INVALID_PROPERTY || err == CL_INVALID_VALUE) { out_of_order = CL_FALSE; queue = clCreateCommandQueue(context, device, 0, &err); } CL_CHECK(err, "clCreateCommandQueue"); - program = build_program_from_source(context, device, clblast_dequant); + program = build_program_from_source(context, device, program_source); // Prepare dequantize kernels kernel_q4_0 = clCreateKernel(program, "dequantize_row_q4_0", &err); From 9074e353dd85e6094817c6f2024acea64579378a Mon Sep 17 00:00:00 2001 From: Henri Vasserman Date: Sun, 14 May 2023 13:15:09 +0300 Subject: [PATCH 04/15] minor nitpicks --- ggml-opencl.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/ggml-opencl.c b/ggml-opencl.c index a9ab4620fb490..176049997ebac 100644 --- a/ggml-opencl.c +++ b/ggml-opencl.c @@ -138,7 +138,6 @@ static cl_platform_id platform; static cl_device_id device; static cl_context context; static cl_command_queue queue; -static cl_bool out_of_order; static cl_program program; static cl_kernel kernel_q4_0, kernel_q4_1, kernel_q5_0, kernel_q5_1, kernel_q8_0; static cl_mem cl_buffer_a, cl_buffer_qb, cl_buffer_b, cl_buffer_c; @@ -214,11 +213,10 @@ void ggml_cl_init(void) { } } - text_buffer[0] = 0; device = NULL; char * GGML_OPENCL_DEVICE = getenv("GGML_OPENCL_DEVICE"); if (GGML_OPENCL_DEVICE != NULL) { - cl_device_id devices[16]; + cl_device_id devices[NDEV]; cl_uint num_devices; clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, NDEV, devices, &num_devices); @@ -284,10 +282,8 @@ void ggml_cl_init(void) { fprintf(stderr, "ggml_opencl: using device: '%s'\n", text_buffer); } - out_of_order = CL_TRUE; queue = clCreateCommandQueue(context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); if (err == CL_INVALID_PROPERTY || err == CL_INVALID_VALUE) { - out_of_order = CL_FALSE; queue = clCreateCommandQueue(context, device, 0, &err); } CL_CHECK(err, "clCreateCommandQueue"); From 394dabbc1a5ac96508baa99b5ffe7c52616f24ce Mon Sep 17 00:00:00 2001 From: Henri Vasserman Date: Sun, 14 May 2023 13:22:39 +0300 Subject: [PATCH 05/15] remove qk as well --- ggml-opencl.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/ggml-opencl.c b/ggml-opencl.c index 176049997ebac..46b62a86953c3 100644 --- a/ggml-opencl.c +++ b/ggml-opencl.c @@ -60,8 +60,8 @@ __kernel void dequantize_row_q4_0(__global struct block_q4_0* x, __global float* const int x0 = (x[i].qs[j] & 0xf) - 8; const int x1 = (x[i].qs[j] >> 4) - 8; - y[i*qk + j + 0 ] = x0*d; - y[i*qk + j + qk/2] = x1*d; + y[i*32 + j + 0 ] = x0*d; + y[i*32 + j + 16] = x1*d; } __kernel void dequantize_row_q4_1(__global struct block_q4_1* x, __global float* y) { @@ -74,8 +74,8 @@ __kernel void dequantize_row_q4_1(__global struct block_q4_1* x, __global float* const int x0 = (x[i].qs[j] & 0xf); const int x1 = (x[i].qs[j] >> 4); - y[i*qk + j + 0 ] = x0*d + m; - y[i*qk + j + qk/2] = x1*d + m; + y[i*32 + j + 0 ] = x0*d + m; + y[i*32 + j + 16] = x1*d + m; } __kernel void dequantize_row_q5_0(__global struct block_q5_0* x, __global float* y) { @@ -92,8 +92,8 @@ __kernel void dequantize_row_q5_0(__global struct block_q5_0* x, __global float* const int32_t x0 = ((x[i].qs[j] & 0xf) | xh_0) - 16; const int32_t x1 = ((x[i].qs[j] >> 4) | xh_1) - 16; - y[i*qk + j + 0 ] = x0*d; - y[i*qk + j + qk/2] = x1*d; + y[i*32 + j + 0 ] = x0*d; + y[i*32 + j + 16] = x1*d; } __kernel void dequantize_row_q5_1(__global struct block_q5_1* x, __global float* y) { @@ -111,8 +111,8 @@ __kernel void dequantize_row_q5_1(__global struct block_q5_1* x, __global float* const int x0 = (x[i].qs[j] & 0xf) | xh_0; const int x1 = (x[i].qs[j] >> 4) | xh_1; - y[i*qk + j + 0 ] = x0*d + m; - y[i*qk + j + qk/2] = x1*d + m; + y[i*32 + j + 0 ] = x0*d + m; + y[i*32 + j + 16] = x1*d + m; } __kernel void dequantize_row_q8_0(__global struct block_q8_0* x, __global float* y) { @@ -120,7 +120,7 @@ __kernel void dequantize_row_q8_0(__global struct block_q8_0* x, __global float* const uint j = get_local_id(0); const float d = x[i].d; - y[i*qk + j] = x[i].qs[j]*d; + y[i*32 + j] = x[i].qs[j]*d; } ); From 9939b87cbbc903208752b3fd3a90f3ef5118507d Mon Sep 17 00:00:00 2001 From: Henri Vasserman Date: Sun, 14 May 2023 19:12:09 +0300 Subject: [PATCH 06/15] Fix Q8_0 --- ggml-opencl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ggml-opencl.c b/ggml-opencl.c index 46b62a86953c3..58a797fba9932 100644 --- a/ggml-opencl.c +++ b/ggml-opencl.c @@ -12,6 +12,7 @@ #define MULTILINE_QUOTE(...) #__VA_ARGS__ static const char * program_source = MULTILINE_QUOTE( +typedef char int8_t; typedef uchar uint8_t; typedef int int32_t; typedef uint uint32_t; @@ -47,7 +48,7 @@ struct block_q5_1 struct block_q8_0 { float d; - uint8_t qs[16]; /* QK8_0 / 2 */ + int8_t qs[32]; /* QK8_0 */ }; From 962e2a9cd9ebc6315cce3e120666280eae2b8cde Mon Sep 17 00:00:00 2001 From: Henri Vasserman Date: Fri, 19 May 2023 00:35:46 +0300 Subject: [PATCH 07/15] Added another check to find a GPU. --- ggml-opencl.c | 58 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 16 deletions(-) diff --git a/ggml-opencl.c b/ggml-opencl.c index 58a797fba9932..b1d49099de554 100644 --- a/ggml-opencl.c +++ b/ggml-opencl.c @@ -181,13 +181,15 @@ void ggml_cl_init(void) { char text_buffer[1024] = {0}; platform = NULL; + device = NULL; + + cl_platform_id platforms[NPLAT]; + cl_uint num_platforms; + err = clGetPlatformIDs(NPLAT, platforms, &num_platforms); + CL_CHECK(err, "clGetPlatformIDs"); + char * GGML_OPENCL_PLATFORM = getenv("GGML_OPENCL_PLATFORM"); if (GGML_OPENCL_PLATFORM != NULL) { - cl_platform_id platforms[NPLAT]; - cl_uint num_platforms; - err = clGetPlatformIDs(NPLAT, platforms, &num_platforms); - CL_CHECK(err, "clGetPlatformIDs"); - unsigned plat_num; if (sscanf(GGML_OPENCL_PLATFORM, " %u", &plat_num) == 1) { if (plat_num >= num_platforms) { @@ -214,12 +216,12 @@ void ggml_cl_init(void) { } } - device = NULL; char * GGML_OPENCL_DEVICE = getenv("GGML_OPENCL_DEVICE"); if (GGML_OPENCL_DEVICE != NULL) { cl_device_id devices[NDEV]; cl_uint num_devices; clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, NDEV, devices, &num_devices); + CL_CHECK(err, "clGetDeviceIDs"); unsigned dev_num; if (sscanf(GGML_OPENCL_DEVICE, " %u", &dev_num) == 1) { @@ -244,6 +246,30 @@ void ggml_cl_init(void) { exit(1); } else { fprintf(stderr, "ggml_opencl: selecting device: '%s'\n", text_buffer); + if (platform == NULL) { + err = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(&platform), &platform, NULL); + CL_CHECK(err, "clGetDeviceInfo"); + } + } + } + + if (platform == NULL) { + cl_device_id devices[NDEV]; + cl_uint num_devices; + + for (unsigned i = 0; i < num_platforms; i++) { + clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, NDEV, devices, &num_devices); + CL_CHECK(err, "clGetDeviceIDs"); + + if (num_devices > 0) { + platform = platforms[i]; + device = devices[0]; + if (num_devices > 1) { + fprintf(stderr, "ggml_opencl: platform has more than 1 GPU, selecting the first.\n"); + } + fprintf(stderr, "ggml_opencl: autodetected GPU.\n"); + break; + } } } @@ -263,25 +289,25 @@ void ggml_cl_init(void) { } } CL_CHECK(err, "clCreateContextFromType"); - } - if (device == NULL) { err = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(&device), &device, NULL); CL_CHECK(err, "clGetContextInfo"); + if (platform == NULL) { err = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(&platform), &platform, NULL); CL_CHECK(err, "clGetDeviceInfo"); } } - if (platform != NULL) { - clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(text_buffer), &text_buffer, NULL); - fprintf(stderr, "ggml_opencl: using platform: '%s'\n", text_buffer); - } - if (device != NULL) { - clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(text_buffer), &text_buffer, NULL); - fprintf(stderr, "ggml_opencl: using device: '%s'\n", text_buffer); - } + + GGML_ASSERT(platform != NULL); + clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(text_buffer), &text_buffer, NULL); + fprintf(stderr, "ggml_opencl: using platform: '%s'\n", text_buffer); + + GGML_ASSERT(device != NULL); + clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(text_buffer), &text_buffer, NULL); + fprintf(stderr, "ggml_opencl: using device: '%s'\n", text_buffer); + queue = clCreateCommandQueue(context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); if (err == CL_INVALID_PROPERTY || err == CL_INVALID_VALUE) { From 772e3fbe125c8f5815108808120536aa7af8fccf Mon Sep 17 00:00:00 2001 From: Henri Vasserman Date: Fri, 19 May 2023 01:12:57 +0300 Subject: [PATCH 08/15] add packed just in case --- ggml-opencl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ggml-opencl.c b/ggml-opencl.c index b1d49099de554..cf62bec7b651f 100644 --- a/ggml-opencl.c +++ b/ggml-opencl.c @@ -17,13 +17,13 @@ typedef uchar uint8_t; typedef int int32_t; typedef uint uint32_t; -struct block_q4_0 +struct __attribute__ ((packed)) block_q4_0 { float d; uint8_t qs[16]; /* QK4_0 / 2 */ }; -struct block_q4_1 +struct __attribute__ ((packed)) block_q4_1 { float d; float m; @@ -37,7 +37,7 @@ struct __attribute__ ((packed)) block_q5_0 uint8_t qs[16]; /* QK5_0 / 2 */ }; -struct block_q5_1 +struct __attribute__ ((packed)) block_q5_1 { half d; half m; @@ -45,7 +45,7 @@ struct block_q5_1 uint8_t qs[16]; /* QK5_1 / 2 */ }; -struct block_q8_0 +struct __attribute__ ((packed)) block_q8_0 { float d; int8_t qs[32]; /* QK8_0 */ From 35dbc8d799ce27e83e5dbbcb684f922f961c9a03 Mon Sep 17 00:00:00 2001 From: Henri Vasserman Date: Fri, 19 May 2023 13:08:57 +0300 Subject: [PATCH 09/15] wrap all CL calls in checks. --- ggml-opencl.c | 160 +++++++++++++++++++++++--------------------------- 1 file changed, 72 insertions(+), 88 deletions(-) diff --git a/ggml-opencl.c b/ggml-opencl.c index cf62bec7b651f..361dc07ed9044 100644 --- a/ggml-opencl.c +++ b/ggml-opencl.c @@ -126,13 +126,24 @@ __kernel void dequantize_row_q8_0(__global struct block_q8_0* x, __global float* ); -#define CL_CHECK(err, name) \ - do { \ - cl_int err_ = (err); \ - if (err_ != CL_SUCCESS) { \ - fprintf(stderr, "ggml_opencl: %s error %d at %s:%d\n", name, err_, __FILE__, __LINE__); \ - exit(1); \ - } \ +#define CL_CHECK(err) \ + do { \ + cl_int err_ = (err); \ + if (err_ != CL_SUCCESS) { \ + fprintf(stderr, "ggml_opencl: %s error %d at %s:%d\n", \ + #err, err_, __FILE__, __LINE__); \ + exit(1); \ + } \ + } while (0) + +#define CLBLAST_CHECK(err) \ + do { \ + CLBlastStatusCode err_ = (err); \ + if (err_ != CLBlastSuccess) { \ + fprintf(stderr, "ggml_opencl: %s error %d at %s:%d\n", \ + #err, err_, __FILE__, __LINE__); \ + exit(1); \ + } \ } while (0) static cl_platform_id platform; @@ -185,8 +196,7 @@ void ggml_cl_init(void) { cl_platform_id platforms[NPLAT]; cl_uint num_platforms; - err = clGetPlatformIDs(NPLAT, platforms, &num_platforms); - CL_CHECK(err, "clGetPlatformIDs"); + CL_CHECK(clGetPlatformIDs(NPLAT, platforms, &num_platforms)); char * GGML_OPENCL_PLATFORM = getenv("GGML_OPENCL_PLATFORM"); if (GGML_OPENCL_PLATFORM != NULL) { @@ -197,11 +207,11 @@ void ggml_cl_init(void) { exit(1); } else { platform = platforms[plat_num]; - clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(text_buffer), &text_buffer, NULL); + CL_CHECK(clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(text_buffer), &text_buffer, NULL)); } } else { for (unsigned i = 0; i < num_platforms; i++) { - clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(text_buffer), &text_buffer, NULL); + CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(text_buffer), &text_buffer, NULL)); if (strstr(text_buffer, GGML_OPENCL_PLATFORM) != NULL) { platform = platforms[i]; break; @@ -220,8 +230,7 @@ void ggml_cl_init(void) { if (GGML_OPENCL_DEVICE != NULL) { cl_device_id devices[NDEV]; cl_uint num_devices; - clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, NDEV, devices, &num_devices); - CL_CHECK(err, "clGetDeviceIDs"); + CL_CHECK(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, NDEV, devices, &num_devices)); unsigned dev_num; if (sscanf(GGML_OPENCL_DEVICE, " %u", &dev_num) == 1) { @@ -230,11 +239,11 @@ void ggml_cl_init(void) { exit(1); } else { device = devices[dev_num]; - clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(text_buffer), &text_buffer, NULL); + CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(text_buffer), &text_buffer, NULL)); } } else { for (unsigned i = 0; i < num_devices; i++) { - clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(text_buffer), &text_buffer, NULL); + CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(text_buffer), &text_buffer, NULL)); if (strstr(text_buffer, GGML_OPENCL_DEVICE) != NULL) { device = devices[i]; break; @@ -247,8 +256,7 @@ void ggml_cl_init(void) { } else { fprintf(stderr, "ggml_opencl: selecting device: '%s'\n", text_buffer); if (platform == NULL) { - err = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(&platform), &platform, NULL); - CL_CHECK(err, "clGetDeviceInfo"); + CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(&platform), &platform, NULL)); } } } @@ -258,8 +266,7 @@ void ggml_cl_init(void) { cl_uint num_devices; for (unsigned i = 0; i < num_platforms; i++) { - clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, NDEV, devices, &num_devices); - CL_CHECK(err, "clGetDeviceIDs"); + CL_CHECK(clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, NDEV, devices, &num_devices)); if (num_devices > 0) { platform = platforms[i]; @@ -278,24 +285,19 @@ void ggml_cl_init(void) { }; if (device != NULL) { - context = clCreateContext(properties, 1, &device, NULL, NULL, &err); - CL_CHECK(err, "clCreateContext"); + CL_CHECK((context = clCreateContext(properties, 1, &device, NULL, NULL, &err), err)); } else { - context = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, NULL, NULL, &err); - if (err == CL_DEVICE_NOT_AVAILABLE || err == CL_DEVICE_NOT_FOUND) { - context = clCreateContextFromType(properties, CL_DEVICE_TYPE_DEFAULT, NULL, NULL, &err); - if (err == CL_DEVICE_NOT_AVAILABLE || err == CL_DEVICE_NOT_FOUND) { - context = clCreateContextFromType(properties, CL_DEVICE_TYPE_ALL, NULL, NULL, &err); - } - } - CL_CHECK(err, "clCreateContextFromType"); - - err = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(&device), &device, NULL); - CL_CHECK(err, "clGetContextInfo"); - + CL_CHECK((context = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, NULL, NULL, &err), + (err != CL_DEVICE_NOT_AVAILABLE && err != CL_DEVICE_NOT_FOUND ? err : + (context = clCreateContextFromType(properties, CL_DEVICE_TYPE_DEFAULT, NULL, NULL, &err), + (err != CL_DEVICE_NOT_AVAILABLE && err != CL_DEVICE_NOT_FOUND ? err : + (context = clCreateContextFromType(properties, CL_DEVICE_TYPE_ALL, NULL, NULL, &err), err)) + )) + )); + + CL_CHECK(clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(&device), &device, NULL)); if (platform == NULL) { - err = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(&platform), &platform, NULL); - CL_CHECK(err, "clGetDeviceInfo"); + CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(&platform), &platform, NULL)); } } @@ -308,26 +310,19 @@ void ggml_cl_init(void) { clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(text_buffer), &text_buffer, NULL); fprintf(stderr, "ggml_opencl: using device: '%s'\n", text_buffer); - - queue = clCreateCommandQueue(context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - if (err == CL_INVALID_PROPERTY || err == CL_INVALID_VALUE) { - queue = clCreateCommandQueue(context, device, 0, &err); - } - CL_CHECK(err, "clCreateCommandQueue"); + CL_CHECK((queue = clCreateCommandQueue(context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err), + (err != CL_INVALID_PROPERTY && err != CL_INVALID_VALUE ? err : + (queue = clCreateCommandQueue(context, device, 0, &err), err) + ))); program = build_program_from_source(context, device, program_source); // Prepare dequantize kernels - kernel_q4_0 = clCreateKernel(program, "dequantize_row_q4_0", &err); - CL_CHECK(err, "clCreateKernel"); - kernel_q4_1 = clCreateKernel(program, "dequantize_row_q4_1", &err); - CL_CHECK(err, "clCreateKernel"); - kernel_q5_0 = clCreateKernel(program, "dequantize_row_q5_0", &err); - CL_CHECK(err, "clCreateKernel"); - kernel_q5_1 = clCreateKernel(program, "dequantize_row_q5_1", &err); - CL_CHECK(err, "clCreateKernel"); - kernel_q8_0 = clCreateKernel(program, "dequantize_row_q8_0", &err); - CL_CHECK(err, "clCreateKernel"); + CL_CHECK((kernel_q4_0 = clCreateKernel(program, "dequantize_row_q4_0", &err), err)); + CL_CHECK((kernel_q4_1 = clCreateKernel(program, "dequantize_row_q4_1", &err), err)); + CL_CHECK((kernel_q5_0 = clCreateKernel(program, "dequantize_row_q5_0", &err), err)); + CL_CHECK((kernel_q5_1 = clCreateKernel(program, "dequantize_row_q5_1", &err), err)); + CL_CHECK((kernel_q8_0 = clCreateKernel(program, "dequantize_row_q8_0", &err), err)); } static void ggml_cl_malloc(size_t req_size, size_t* cur_size, cl_mem_flags flags, cl_mem* buf) { @@ -340,9 +335,8 @@ static void ggml_cl_malloc(size_t req_size, size_t* cur_size, cl_mem_flags flags clReleaseMemObject(*buf); } cl_int err; - *buf = clCreateBuffer(context, flags, req_size, NULL, &err); + CL_CHECK((*buf = clCreateBuffer(context, flags, req_size, NULL, &err), err)); *cur_size = req_size; - CL_CHECK(err, "clCreateBuffer"); } void ggml_cl_sgemm_wrapper( @@ -351,7 +345,6 @@ void ggml_cl_sgemm_wrapper( const float alpha, const void *host_a, const int lda, const float *host_b, const int ldb, const float beta, float *host_c, const int ldc, const int btype) { - cl_int err = 0; cl_kernel kernel; size_t global = n * k, local, size_qb; @@ -411,49 +404,40 @@ void ggml_cl_sgemm_wrapper( cl_event ev_a, ev_qb, ev_b; if (dequant) { - err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &cl_buffer_qb); - err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &cl_buffer_b); - CL_CHECK(err, "clSetKernelArg"); - err = clEnqueueWriteBuffer(queue, cl_buffer_qb, CL_FALSE, 0, size_qb, host_b, 0, NULL, &ev_qb); - CL_CHECK(err, "clEnqueueWriteBuffer qb"); + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &cl_buffer_qb)); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &cl_buffer_b)); + CL_CHECK(clEnqueueWriteBuffer(queue, cl_buffer_qb, CL_FALSE, 0, size_qb, host_b, 0, NULL, &ev_qb)); } else { - err = clEnqueueWriteBuffer(queue, cl_buffer_b, CL_FALSE, 0, size_b, host_b, 0, NULL, &ev_b); - CL_CHECK(err, "clEnqueueWriteBuffer b"); + CL_CHECK(clEnqueueWriteBuffer(queue, cl_buffer_b, CL_FALSE, 0, size_b, host_b, 0, NULL, &ev_b)); } - err = clEnqueueWriteBuffer(queue, cl_buffer_a, CL_FALSE, 0, size_a, host_a, 0, NULL, &ev_a); - CL_CHECK(err, "clEnqueueWriteBuffer a"); + CL_CHECK(clEnqueueWriteBuffer(queue, cl_buffer_a, CL_FALSE, 0, size_a, host_a, 0, NULL, &ev_a)); if (dequant) { - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 1, &ev_qb, &ev_b); - CL_CHECK(err, "clEnqueueNDRangeKernel"); - clReleaseEvent(ev_qb); + CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 1, &ev_qb, &ev_b)); + CL_CHECK(clReleaseEvent(ev_qb)); } - clWaitForEvents(1, &ev_a); - clWaitForEvents(1, &ev_b); - clReleaseEvent(ev_a); - clReleaseEvent(ev_b); + CL_CHECK(clWaitForEvents(1, &ev_a)); + CL_CHECK(clWaitForEvents(1, &ev_b)); + CL_CHECK(clReleaseEvent(ev_a)); + CL_CHECK(clReleaseEvent(ev_b)); cl_event ev_sgemm; - CLBlastStatusCode status = CLBlastSgemm((CLBlastLayout)order, - (CLBlastTranspose)trans_a, (CLBlastTranspose)trans_b, - m, n, k, - alpha, - cl_buffer_a, 0, lda, - cl_buffer_b, 0, ldb, - beta, - cl_buffer_c, 0, ldc, - &queue, &ev_sgemm); - - if (status != CLBlastSuccess) { - fprintf(stderr, "Error: CLBlast SGEMM %d\n", status); - abort(); - } + CLBLAST_CHECK(CLBlastSgemm( + (CLBlastLayout)order, + (CLBlastTranspose)trans_a, (CLBlastTranspose)trans_b, + m, n, k, + alpha, + cl_buffer_a, 0, lda, + cl_buffer_b, 0, ldb, + beta, + cl_buffer_c, 0, ldc, + &queue, &ev_sgemm)); cl_event ev_c; - clEnqueueReadBuffer(queue, cl_buffer_c, CL_TRUE, 0, size_c, host_c, 1, &ev_sgemm, &ev_c); + CL_CHECK(clEnqueueReadBuffer(queue, cl_buffer_c, CL_TRUE, 0, size_c, host_c, 1, &ev_sgemm, &ev_c)); // Wait for completion - clWaitForEvents(1, &ev_c); - clReleaseEvent(ev_sgemm); - clReleaseEvent(ev_c); + CL_CHECK(clWaitForEvents(1, &ev_c)); + CL_CHECK(clReleaseEvent(ev_sgemm)); + CL_CHECK(clReleaseEvent(ev_c)); } From 6df8e9323483b681dc3efc7f5e0f4644a91d8a19 Mon Sep 17 00:00:00 2001 From: Henri Vasserman Date: Fri, 19 May 2023 23:52:35 +0300 Subject: [PATCH 10/15] update Q formats --- ggml-opencl.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/ggml-opencl.c b/ggml-opencl.c index 361dc07ed9044..c9e7418d5b07b 100644 --- a/ggml-opencl.c +++ b/ggml-opencl.c @@ -19,14 +19,14 @@ typedef uint uint32_t; struct __attribute__ ((packed)) block_q4_0 { - float d; + half d; uint8_t qs[16]; /* QK4_0 / 2 */ }; struct __attribute__ ((packed)) block_q4_1 { - float d; - float m; + half d; + half m; uint8_t qs[16]; /* QK4_1 / 2 */ }; @@ -47,7 +47,7 @@ struct __attribute__ ((packed)) block_q5_1 struct __attribute__ ((packed)) block_q8_0 { - float d; + half d; int8_t qs[32]; /* QK8_0 */ }; @@ -56,7 +56,7 @@ __kernel void dequantize_row_q4_0(__global struct block_q4_0* x, __global float* const uint i = get_global_id(0) / 32; /* QK4_0 */ const uint j = get_local_id(0); - const float d = x[i].d; + const float d = vload_half(0, (__global half*) &x[i].d); const int x0 = (x[i].qs[j] & 0xf) - 8; const int x1 = (x[i].qs[j] >> 4) - 8; @@ -69,8 +69,8 @@ __kernel void dequantize_row_q4_1(__global struct block_q4_1* x, __global float* const uint i = get_global_id(0) / 32; /* QK4_1 */ const uint j = get_local_id(0); - const float d = x[i].d; - const float m = x[i].m; + const float d = vload_half(0, (__global half*) &x[i].d); + const float m = vload_half(0, (__global half*) &x[i].m); const int x0 = (x[i].qs[j] & 0xf); const int x1 = (x[i].qs[j] >> 4); @@ -120,7 +120,7 @@ __kernel void dequantize_row_q8_0(__global struct block_q8_0* x, __global float* const uint i = get_global_id(0) / 32; /* QK8_0 */ const uint j = get_local_id(0); - const float d = x[i].d; + const float d = vload_half(0, (__global half*) &x[i].d); y[i*32 + j] = x[i].qs[j]*d; } @@ -358,13 +358,13 @@ void ggml_cl_sgemm_wrapper( dequant = true; kernel = kernel_q4_0; local = 16; - size_qb = global * (sizeof(float) + local) / 32; + size_qb = global * (sizeof(ggml_fp16_t) + local) / 32; break; case GGML_TYPE_Q4_1: dequant = true; kernel = kernel_q4_1; local = 16; - size_qb = global * (sizeof(float) * 2 + local) / 32; + size_qb = global * (sizeof(ggml_fp16_t) * 2 + local) / 32; break; case GGML_TYPE_Q5_0: dequant = true; @@ -382,7 +382,7 @@ void ggml_cl_sgemm_wrapper( dequant = true; kernel = kernel_q8_0; local = 32; - size_qb = global * (sizeof(float) + local) / 32; + size_qb = global * (sizeof(ggml_fp16_t) + local) / 32; break; default: fprintf(stderr, "Error: Unsupported OpenCL btype %d\n", btype); From e71bba90b825f094ba33171c37f89045d5829a17 Mon Sep 17 00:00:00 2001 From: Henri Vasserman Date: Sat, 20 May 2023 14:58:33 +0300 Subject: [PATCH 11/15] rewrite platform selection code. --- ggml-opencl.c | 218 ++++++++++++++++++++++++++++---------------------- 1 file changed, 123 insertions(+), 95 deletions(-) diff --git a/ggml-opencl.c b/ggml-opencl.c index c9e7418d5b07b..f5513e62fc97f 100644 --- a/ggml-opencl.c +++ b/ggml-opencl.c @@ -187,128 +187,156 @@ static cl_program build_program_from_source(cl_context ctx, cl_device_id dev, co void ggml_cl_init(void) { cl_int err = 0; + struct cl_device; + struct cl_platform { + cl_platform_id id; + unsigned number; + char name[128]; + char vendor[128]; + struct cl_device * devices; + unsigned n_devices; + struct cl_device * default_device; + }; + + struct cl_device { + struct cl_platform * platform; + cl_device_id id; + unsigned number; + cl_device_type type; + char name[128]; + }; + enum { NPLAT = 16, NDEV = 16 }; - char text_buffer[1024] = {0}; + struct cl_platform platforms[NPLAT]; + unsigned n_platforms = 0; + struct cl_device devices[NDEV]; + unsigned n_devices = 0; + struct cl_device * default_device = NULL; platform = NULL; device = NULL; - cl_platform_id platforms[NPLAT]; - cl_uint num_platforms; - CL_CHECK(clGetPlatformIDs(NPLAT, platforms, &num_platforms)); - - char * GGML_OPENCL_PLATFORM = getenv("GGML_OPENCL_PLATFORM"); - if (GGML_OPENCL_PLATFORM != NULL) { - unsigned plat_num; - if (sscanf(GGML_OPENCL_PLATFORM, " %u", &plat_num) == 1) { - if (plat_num >= num_platforms) { - fprintf(stderr, "ggml_opencl: There is no platform %d\n", plat_num); - exit(1); - } else { - platform = platforms[plat_num]; - CL_CHECK(clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(text_buffer), &text_buffer, NULL)); - } - } else { - for (unsigned i = 0; i < num_platforms; i++) { - CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(text_buffer), &text_buffer, NULL)); - if (strstr(text_buffer, GGML_OPENCL_PLATFORM) != NULL) { - platform = platforms[i]; - break; - } + cl_platform_id platform_ids[NPLAT]; + CL_CHECK(clGetPlatformIDs(NPLAT, platform_ids, &n_platforms)); + + for (unsigned i = 0; i < n_platforms; i++) { + struct cl_platform * p = &platforms[i]; + p->number = i; + p->id = platform_ids[i]; + CL_CHECK(clGetPlatformInfo(p->id, CL_PLATFORM_NAME, sizeof(p->name), &p->name, NULL)); + CL_CHECK(clGetPlatformInfo(p->id, CL_PLATFORM_VENDOR, sizeof(p->vendor), &p->vendor, NULL)); + + cl_device_id device_ids[NDEV]; + cl_int clGetDeviceIDsError = clGetDeviceIDs(p->id, CL_DEVICE_TYPE_ALL, NDEV, device_ids, &p->n_devices); + if (clGetDeviceIDsError == CL_DEVICE_NOT_FOUND) p->n_devices = 0; + else CL_CHECK(clGetDeviceIDsError); + p->devices = p->n_devices > 0 ? &devices[n_devices] : NULL; + p->default_device = NULL; + + for (unsigned j = 0; j < p->n_devices; j++) { + struct cl_device * d = &devices[n_devices]; + d->number = n_devices++; + d->id = device_ids[j]; + d->platform = p; + CL_CHECK(clGetDeviceInfo(d->id, CL_DEVICE_NAME, sizeof(d->name), &d->name, NULL)); + CL_CHECK(clGetDeviceInfo(d->id, CL_DEVICE_TYPE, sizeof(d->type), &d->type, NULL)); + + if (p->default_device == NULL && d->type == CL_DEVICE_TYPE_GPU) { + p->default_device = d; } } - if (platform == NULL) { - fprintf(stderr, "ggml_opencl: no platform matching '%s' was found.\n", GGML_OPENCL_PLATFORM); - exit(1); - } else { - fprintf(stderr, "ggml_opencl: selecting platform: '%s'\n", text_buffer); + + if (default_device == NULL && p->default_device != NULL) { + default_device = p->default_device; } } - char * GGML_OPENCL_DEVICE = getenv("GGML_OPENCL_DEVICE"); - if (GGML_OPENCL_DEVICE != NULL) { - cl_device_id devices[NDEV]; - cl_uint num_devices; - CL_CHECK(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, NDEV, devices, &num_devices)); - - unsigned dev_num; - if (sscanf(GGML_OPENCL_DEVICE, " %u", &dev_num) == 1) { - if (dev_num >= num_devices) { - fprintf(stderr, "ggml_opencl: There is no device %d\n", dev_num); - exit(1); - } else { - device = devices[dev_num]; - CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(text_buffer), &text_buffer, NULL)); - } - } else { - for (unsigned i = 0; i < num_devices; i++) { - CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(text_buffer), &text_buffer, NULL)); - if (strstr(text_buffer, GGML_OPENCL_DEVICE) != NULL) { - device = devices[i]; - break; - } + if (n_devices == 0) { + fprintf(stderr, "ggml_opencl: could find any OpenCL devices.\n"); + exit(1); + } + + char * user_platform_string = getenv("GGML_OPENCL_PLATFORM"); + char * user_device_string = getenv("GGML_OPENCL_DEVICE"); + int user_platform_number = -1; + int user_device_number = -1; + + unsigned n; + if (user_platform_string != NULL && sscanf(user_platform_string, " %u", &n) == 1 && n < n_platforms) { + user_platform_number = (int)n; + } + if (user_device_string != NULL && sscanf(user_device_string, " %u", &n) == 1 && n < n_devices) { + user_device_number = (int)n; + } + + struct cl_device * selected_devices = devices; + unsigned n_selected_devices = n_devices; + + if (user_platform_number == -1 && user_platform_string != NULL && user_platform_string[0] != 0) { + for (unsigned i = 0; i < n_platforms; i++) { + struct cl_platform * p = &platforms[i]; + if (strstr(p->name, user_platform_string) != NULL || + strstr(p->vendor, user_platform_string) != NULL) { + user_platform_number = (int)i; + break; } } - if (device == NULL) { - fprintf(stderr, "ggml_opencl: no device matching '%s' was found.\n", GGML_OPENCL_DEVICE); + if (user_platform_number == -1) { + fprintf(stderr, "ggml_opencl: no platform matching '%s' was found.\n", user_platform_string); + exit(1); + } + } + if (user_platform_number != -1) { + struct cl_platform * p = &platforms[user_platform_number]; + selected_devices = p->devices; + n_selected_devices = p->n_devices; + default_device = p->default_device; + if (n_selected_devices == 0) { + fprintf(stderr, "ggml_opencl: selected platform '%s' does not have any devices.\n", p->name); exit(1); - } else { - fprintf(stderr, "ggml_opencl: selecting device: '%s'\n", text_buffer); - if (platform == NULL) { - CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(&platform), &platform, NULL)); - } } } - if (platform == NULL) { - cl_device_id devices[NDEV]; - cl_uint num_devices; - - for (unsigned i = 0; i < num_platforms; i++) { - CL_CHECK(clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, NDEV, devices, &num_devices)); - - if (num_devices > 0) { - platform = platforms[i]; - device = devices[0]; - if (num_devices > 1) { - fprintf(stderr, "ggml_opencl: platform has more than 1 GPU, selecting the first.\n"); - } - fprintf(stderr, "ggml_opencl: autodetected GPU.\n"); + if (user_device_number == -1 && user_device_string != NULL && user_device_string[0] != 0) { + for (unsigned i = 0; i < n_selected_devices; i++) { + struct cl_device * d = &selected_devices[i]; + if (strstr(d->name, user_device_string) != NULL) { + user_device_number = (int)i; break; } } + if (user_device_number == -1) { + fprintf(stderr, "ggml_opencl: no device matching '%s' was found.\n", user_device_string); + exit(1); + } + } + if (user_device_number != -1) { + selected_devices = &selected_devices[user_device_number]; + n_selected_devices = 1; + default_device = &selected_devices[0]; } - cl_context_properties *properties = platform == NULL ? NULL : (cl_context_properties[]){ - (intptr_t)CL_CONTEXT_PLATFORM, (intptr_t)platform, 0 - }; + GGML_ASSERT(n_selected_devices > 0); - if (device != NULL) { - CL_CHECK((context = clCreateContext(properties, 1, &device, NULL, NULL, &err), err)); - } else { - CL_CHECK((context = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, NULL, NULL, &err), - (err != CL_DEVICE_NOT_AVAILABLE && err != CL_DEVICE_NOT_FOUND ? err : - (context = clCreateContextFromType(properties, CL_DEVICE_TYPE_DEFAULT, NULL, NULL, &err), - (err != CL_DEVICE_NOT_AVAILABLE && err != CL_DEVICE_NOT_FOUND ? err : - (context = clCreateContextFromType(properties, CL_DEVICE_TYPE_ALL, NULL, NULL, &err), err)) - )) - )); - - CL_CHECK(clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(&device), &device, NULL)); - if (platform == NULL) { - CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(&platform), &platform, NULL)); - } + if (default_device == NULL) { + default_device = &selected_devices[0]; } + fprintf(stderr, "ggml_opencl: selecting platform: '%s'\n", default_device->platform->name); + fprintf(stderr, "ggml_opencl: selecting device: '%s'\n", default_device->name); + if (default_device->type != CL_DEVICE_TYPE_GPU) { + fprintf(stderr, "ggml_opencl: warning, not a GPU: '%s'.\n", default_device->name); + } + + platform = default_device->platform->id; + device = default_device->id; - GGML_ASSERT(platform != NULL); - clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(text_buffer), &text_buffer, NULL); - fprintf(stderr, "ggml_opencl: using platform: '%s'\n", text_buffer); + cl_context_properties properties[] = { + (intptr_t)CL_CONTEXT_PLATFORM, (intptr_t)platform, 0 + }; - GGML_ASSERT(device != NULL); - clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(text_buffer), &text_buffer, NULL); - fprintf(stderr, "ggml_opencl: using device: '%s'\n", text_buffer); + CL_CHECK((context = clCreateContext(properties, 1, &device, NULL, NULL, &err), err)); CL_CHECK((queue = clCreateCommandQueue(context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err), (err != CL_INVALID_PROPERTY && err != CL_INVALID_VALUE ? err : From 4f97f73db237963b5e52985eb92e36944e3baed3 Mon Sep 17 00:00:00 2001 From: Henri Vasserman Date: Sat, 20 May 2023 15:21:38 +0300 Subject: [PATCH 12/15] fix indexing issue --- ggml-opencl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml-opencl.c b/ggml-opencl.c index f5513e62fc97f..1769845125996 100644 --- a/ggml-opencl.c +++ b/ggml-opencl.c @@ -302,7 +302,7 @@ void ggml_cl_init(void) { for (unsigned i = 0; i < n_selected_devices; i++) { struct cl_device * d = &selected_devices[i]; if (strstr(d->name, user_device_string) != NULL) { - user_device_number = (int)i; + user_device_number = d->number; break; } } @@ -312,7 +312,7 @@ void ggml_cl_init(void) { } } if (user_device_number != -1) { - selected_devices = &selected_devices[user_device_number]; + selected_devices = &devices[user_device_number]; n_selected_devices = 1; default_device = &selected_devices[0]; } From ad9ab0e3feb7a88a58ebca803d00ec5dc886660e Mon Sep 17 00:00:00 2001 From: Henri Vasserman Date: Sat, 20 May 2023 15:27:24 +0300 Subject: [PATCH 13/15] editorconfig fixes --- ggml-opencl.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ggml-opencl.c b/ggml-opencl.c index 1769845125996..f62c4b596cc0e 100644 --- a/ggml-opencl.c +++ b/ggml-opencl.c @@ -233,7 +233,7 @@ void ggml_cl_init(void) { else CL_CHECK(clGetDeviceIDsError); p->devices = p->n_devices > 0 ? &devices[n_devices] : NULL; p->default_device = NULL; - + for (unsigned j = 0; j < p->n_devices; j++) { struct cl_device * d = &devices[n_devices]; d->number = n_devices++; @@ -269,14 +269,14 @@ void ggml_cl_init(void) { if (user_device_string != NULL && sscanf(user_device_string, " %u", &n) == 1 && n < n_devices) { user_device_number = (int)n; } - + struct cl_device * selected_devices = devices; unsigned n_selected_devices = n_devices; if (user_platform_number == -1 && user_platform_string != NULL && user_platform_string[0] != 0) { for (unsigned i = 0; i < n_platforms; i++) { struct cl_platform * p = &platforms[i]; - if (strstr(p->name, user_platform_string) != NULL || + if (strstr(p->name, user_platform_string) != NULL || strstr(p->vendor, user_platform_string) != NULL) { user_platform_number = (int)i; break; From 71ac58ae5393e59d398323cd2b50d54384c17a35 Mon Sep 17 00:00:00 2001 From: Henri Vasserman Date: Sat, 20 May 2023 15:29:26 +0300 Subject: [PATCH 14/15] make clang-tidy happy --- ggml-opencl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml-opencl.c b/ggml-opencl.c index f62c4b596cc0e..cf44c604d4739 100644 --- a/ggml-opencl.c +++ b/ggml-opencl.c @@ -229,8 +229,8 @@ void ggml_cl_init(void) { cl_device_id device_ids[NDEV]; cl_int clGetDeviceIDsError = clGetDeviceIDs(p->id, CL_DEVICE_TYPE_ALL, NDEV, device_ids, &p->n_devices); - if (clGetDeviceIDsError == CL_DEVICE_NOT_FOUND) p->n_devices = 0; - else CL_CHECK(clGetDeviceIDsError); + if (clGetDeviceIDsError == CL_DEVICE_NOT_FOUND) { p->n_devices = 0; } + else { CL_CHECK(clGetDeviceIDsError); } p->devices = p->n_devices > 0 ? &devices[n_devices] : NULL; p->default_device = NULL; From c29378e5a844371b5ed8ffd4a8a6ab95ba88de90 Mon Sep 17 00:00:00 2001 From: Henri Vasserman Date: Sat, 20 May 2023 16:03:25 +0300 Subject: [PATCH 15/15] clang-tidi --- ggml-opencl.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ggml-opencl.c b/ggml-opencl.c index cf44c604d4739..e26631fcfa1b6 100644 --- a/ggml-opencl.c +++ b/ggml-opencl.c @@ -229,8 +229,11 @@ void ggml_cl_init(void) { cl_device_id device_ids[NDEV]; cl_int clGetDeviceIDsError = clGetDeviceIDs(p->id, CL_DEVICE_TYPE_ALL, NDEV, device_ids, &p->n_devices); - if (clGetDeviceIDsError == CL_DEVICE_NOT_FOUND) { p->n_devices = 0; } - else { CL_CHECK(clGetDeviceIDsError); } + if (clGetDeviceIDsError == CL_DEVICE_NOT_FOUND) { + p->n_devices = 0; + } else { + CL_CHECK(clGetDeviceIDsError); + } p->devices = p->n_devices > 0 ? &devices[n_devices] : NULL; p->default_device = NULL;