Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OpenCL: Fixes for older devices. #1435

Merged
merged 19 commits into from
May 20, 2023
Merged
173 changes: 131 additions & 42 deletions ggml-opencl.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,48 +16,48 @@ typedef uchar uint8_t;
typedef int int32_t;
typedef uint uint32_t;

constant uint QK4_0 = 32;
//constant uint QK4_0 = 32;
struct block_q4_0
{
float d;
uint8_t qs[QK4_0 / 2];
uint8_t qs[16]; // QK4_0 / 2
};

constant uint QK4_1 = 32;
//constant uint QK4_1 = 32;
struct block_q4_1
{
float d;
float m;
uint8_t qs[QK4_1 / 2];
uint8_t qs[16]; // QK4_1 / 2
};

constant uint QK5_0 = 32;
//constant uint QK5_0 = 32;
struct __attribute__ ((packed)) block_q5_0
{
half d;
uint32_t qh;
uint8_t qs[QK5_0 / 2];
uint8_t qs[16]; // QK5_0 / 2
};

constant uint QK5_1 = 32;
//constant uint QK5_1 = 32;
struct block_q5_1
{
half d;
half m;
uint32_t qh;
uint8_t qs[QK5_1 / 2];
uint8_t qs[16]; // QK5_1 / 2
};

constant uint QK8_0 = 32;
//constant uint QK8_0 = 32;
struct block_q8_0
{
float d;
uint8_t qs[QK8_0];
uint8_t qs[16]; // QK8_0 / 2
};


__kernel void dequantize_row_q4_0(__global struct block_q4_0* x, __global float* y) {
constant uint qk = QK4_0;
constant uint qk = 32; // QK4_0;

const uint i = get_global_id(0) / qk;
const uint j = get_local_id(0);
Expand All @@ -72,7 +72,7 @@ __kernel void dequantize_row_q4_0(__global struct block_q4_0* x, __global float*
}

__kernel void dequantize_row_q4_1(__global struct block_q4_1* x, __global float* y) {
constant uint qk = QK4_1;
constant uint qk = 32; // QK4_1;

const uint i = get_global_id(0) / qk;
const uint j = get_local_id(0);
Expand All @@ -88,7 +88,7 @@ __kernel void dequantize_row_q4_1(__global struct block_q4_1* x, __global float*
}

__kernel void dequantize_row_q5_0(__global struct block_q5_0* x, __global float* y) {
constant uint qk = QK5_0;
constant uint qk = 32; // QK5_0;

const uint i = get_global_id(0) / qk;
const uint j = get_local_id(0);
Expand All @@ -108,7 +108,7 @@ __kernel void dequantize_row_q5_0(__global struct block_q5_0* x, __global float*
}

__kernel void dequantize_row_q5_1(__global struct block_q5_1* x, __global float* y) {
constant uint qk = QK5_1;
constant uint qk = 32; // QK5_1;

const uint i = get_global_id(0) / qk;
const uint j = get_local_id(0);
Expand All @@ -129,7 +129,7 @@ __kernel void dequantize_row_q5_1(__global struct block_q5_1* x, __global float*
}

__kernel void dequantize_row_q8_0(__global struct block_q8_0* x, __global float* y) {
constant uint qk = QK8_0;
constant uint qk = 32; // QK8_0;
const uint i = get_global_id(0) / qk;
const uint j = get_local_id(0);

Expand All @@ -143,7 +143,7 @@ __kernel void dequantize_row_q8_0(__global struct block_q8_0* x, __global float*
do { \
cl_int err_ = (err); \
if (err_ != CL_SUCCESS) { \
fprintf(stderr, "OpenCL %s error %d at %s:%d\n", name, err_, __FILE__, __LINE__); \
fprintf(stderr, "ggml_opencl: %s error %d at %s:%d\n", name, err_, __FILE__, __LINE__); \
exit(1); \
} \
} while (0)
Expand All @@ -152,6 +152,7 @@ static cl_platform_id platform;
static cl_device_id device;
static cl_context context;
static cl_command_queue queue;
static cl_bool out_of_order;
static cl_program program;
static cl_kernel kernel_q4_0, kernel_q4_1, kernel_q5_0, kernel_q5_1, kernel_q8_0;
static cl_mem cl_buffer_a, cl_buffer_qb, cl_buffer_b, cl_buffer_c;
Expand Down Expand Up @@ -188,35 +189,123 @@ static cl_program build_program_from_source(cl_context ctx, cl_device_id dev, co

void ggml_cl_init(void) {
cl_int err = 0;
char * GGML_CLBLAST_PLATFORM = getenv("GGML_CLBLAST_PLATFORM");
char * GGML_CLBLAST_DEVICE = getenv("GGML_CLBLAST_DEVICE");
int plat_num = (GGML_CLBLAST_PLATFORM == NULL ? 0 : atoi(GGML_CLBLAST_PLATFORM));
int dev_num = (GGML_CLBLAST_DEVICE == NULL ? 0 : atoi(GGML_CLBLAST_DEVICE));
printf("\nInitializing CLBlast (First Run)...");
printf("\nAttempting to use: Platform=%d, Device=%d (If invalid, program will crash)\n",plat_num,dev_num);
cl_uint num_platforms;
clGetPlatformIDs(0, NULL, &num_platforms);
cl_platform_id* platforms = (cl_platform_id*)malloc(num_platforms*sizeof(cl_platform_id));
clGetPlatformIDs(num_platforms, platforms, NULL);
platform = platforms[plat_num];
char platform_buffer[1024];
clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(platform_buffer), &platform_buffer, NULL);
cl_uint num_devices;
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
cl_device_id* devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id));
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
device = devices[dev_num];
char device_buffer[1024];
clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_buffer), &device_buffer, NULL);
printf("Using Platform: %s Device: %s\n", platform_buffer, device_buffer);
context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
CL_CHECK(err, "clCreateContext");

enum { NPLAT = 16, NDEV = 16 };

char text_buffer[1024] = {0};

platform = NULL;
char * GGML_OPENCL_PLATFORM = getenv("GGML_OPENCL_PLATFORM");
if (GGML_OPENCL_PLATFORM != NULL) {
cl_platform_id platforms[NPLAT];
cl_uint num_platforms;
err = clGetPlatformIDs(NPLAT, platforms, &num_platforms);
CL_CHECK(err, "clGetPlatformIDs");

unsigned plat_num;
if (sscanf(GGML_OPENCL_PLATFORM, " %u", &plat_num) == 1) {
if (plat_num >= num_platforms) {
fprintf(stderr, "ggml_opencl: There is no platform %d\n", plat_num);
exit(1);
} else {
platform = platforms[plat_num];
clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(text_buffer), &text_buffer, NULL);
}
} else {
for (unsigned i = 0; i < num_platforms; i++) {
clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(text_buffer), &text_buffer, NULL);
if (strstr(text_buffer, GGML_OPENCL_PLATFORM) != NULL) {
platform = platforms[i];
break;
}
}
}
if (platform == NULL) {
fprintf(stderr, "ggml_opencl: no platform matching '%s' was found.\n", GGML_OPENCL_PLATFORM);
exit(1);
} else {
fprintf(stderr, "ggml_opencl: selecting platform: '%s'\n", text_buffer);
}
}

text_buffer[0] = 0;
device = NULL;
char * GGML_OPENCL_DEVICE = getenv("GGML_OPENCL_DEVICE");
if (GGML_OPENCL_DEVICE != NULL) {
cl_device_id devices[16];
cl_uint num_devices;
clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, NDEV, devices, &num_devices);

unsigned dev_num;
if (sscanf(GGML_OPENCL_DEVICE, " %u", &dev_num) == 1) {
if (dev_num >= num_devices) {
fprintf(stderr, "ggml_opencl: There is no device %d\n", dev_num);
exit(1);
} else {
device = devices[dev_num];
clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(text_buffer), &text_buffer, NULL);
}
} else {
for (unsigned i = 0; i < num_devices; i++) {
clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(text_buffer), &text_buffer, NULL);
if (strstr(text_buffer, GGML_OPENCL_DEVICE) != NULL) {
device = devices[i];
break;
}
}
}
if (device == NULL) {
fprintf(stderr, "ggml_opencl: no device matching '%s' was found.\n", GGML_OPENCL_DEVICE);
exit(1);
} else {
fprintf(stderr, "ggml_opencl: selecting device: '%s'\n", text_buffer);
}
}

cl_context_properties *properties = platform == NULL ? NULL : (cl_context_properties[]){
(intptr_t)CL_CONTEXT_PLATFORM, (intptr_t)platform, 0
};

if (device != NULL) {
context = clCreateContext(properties, 1, &device, NULL, NULL, &err);
CL_CHECK(err, "clCreateContext");
} else {
context = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, NULL, NULL, &err);
if (err == CL_DEVICE_NOT_AVAILABLE || err == CL_DEVICE_NOT_FOUND) {
context = clCreateContextFromType(properties, CL_DEVICE_TYPE_DEFAULT, NULL, NULL, &err);
if (err == CL_DEVICE_NOT_AVAILABLE || err == CL_DEVICE_NOT_FOUND) {
context = clCreateContextFromType(properties, CL_DEVICE_TYPE_ALL, NULL, NULL, &err);
}
}
CL_CHECK(err, "clCreateContextFromType");
}

if (device == NULL) {
err = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(&device), &device, NULL);
CL_CHECK(err, "clGetContextInfo");
if (platform == NULL) {
err = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(&platform), &platform, NULL);
CL_CHECK(err, "clGetDeviceInfo");
}
}

if (platform != NULL) {
clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(text_buffer), &text_buffer, NULL);
fprintf(stderr, "ggml_opencl: using platform: '%s'\n", text_buffer);
}
if (device != NULL) {
clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(text_buffer), &text_buffer, NULL);
fprintf(stderr, "ggml_opencl: using device: '%s'\n", text_buffer);
}

out_of_order = CL_TRUE;
queue = clCreateCommandQueue(context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
if (err == CL_INVALID_PROPERTY) {
out_of_order = CL_FALSE;
queue = clCreateCommandQueue(context, device, 0, &err);
}
CL_CHECK(err, "clCreateCommandQueue");

free(platforms);
free(devices);

program = build_program_from_source(context, device, clblast_dequant);

// Prepare dequantize kernels
Expand Down