From 92a6e13a31ba052abd9062af6cb8df2a293ce661 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Fri, 28 Apr 2023 15:40:32 +0200 Subject: [PATCH 1/8] Add Manjaro CUDA include and lib dirs to Makefile (#1212) --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 8fbb19c46cc10..f7c8dbfdc64ac 100644 --- a/Makefile +++ b/Makefile @@ -105,8 +105,8 @@ ifdef LLAMA_OPENBLAS LDFLAGS += -lopenblas endif ifdef LLAMA_CUBLAS - CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include - LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 + CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include + LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib OBJS += ggml-cuda.o NVCC = nvcc NVCCFLAGS = --forward-unknown-to-host-compiler -arch=native From 78ec543733d10a1629f984fd0302fdaa4e87fe66 Mon Sep 17 00:00:00 2001 From: Folko-Ven <71110216+Folko-Ven@users.noreply.github.com> Date: Fri, 28 Apr 2023 19:22:48 +0500 Subject: [PATCH 2/8] Correcting link to w64devkit (#1214) Correcting link to w64devkit (change seeto to skeeto). --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index be0e49e47abc5..2a20746c63b18 100644 --- a/README.md +++ b/README.md @@ -174,7 +174,7 @@ In order to build llama.cpp you have three different options. - On Windows: - 1. Download the latest fortran version of [w64devkit](https://github.com/seeto/w64devkit/releases). + 1. Download the latest fortran version of [w64devkit](https://github.com/skeeto/w64devkit/releases). 2. Extract `w64devkit` on your pc. 3. Run `w64devkit.exe`. 4. Use the `cd` command to reach the `llama.cpp` folder. From 7296c961d9303010a2b98379f738da2a8a55aa1b Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Fri, 28 Apr 2023 16:57:16 +0200 Subject: [PATCH 3/8] ggml : add CLBlast support (#1164) * Allow use of OpenCL GPU-based BLAS using ClBlast instead of OpenBLAS for context processing * Improve ClBlast implementation, avoid recreating buffers, remove redundant transfers * Finish merge of ClBlast support * Move CLBlast implementation to separate file Add buffer reuse code (adapted from slaren's cuda implementation) * Add q4_2 and q4_3 CLBlast support, improve code * Double CLBlast speed by disabling OpenBLAS thread workaround Co-authored-by: Concedo <39025047+LostRuins@users.noreply.github.com> Co-authored-by: slaren <2141330+slaren@users.noreply.github.com> * Fix device selection env variable names * Fix cast in opencl kernels * Add CLBlast to CMakeLists.txt * Replace buffer pool with static buffers a, b, qb, c Fix compile warnings * Fix typos, use GGML_TYPE defines, improve code * Improve btype dequant kernel selection code, add error if type is unsupported * Improve code quality * Move internal stuff out of header * Use internal enums instead of CLBlast enums * Remove leftover C++ includes and defines * Make event use easier to read Co-authored-by: Henri Vasserman * Use c compiler for opencl files * Simplify code, fix include * First check error, then release event * Make globals static, fix indentation * Rename dequant kernels file to conform with other file names * Fix import cl file name --------- Co-authored-by: Concedo <39025047+LostRuins@users.noreply.github.com> Co-authored-by: slaren <2141330+slaren@users.noreply.github.com> Co-authored-by: Henri Vasserman Co-authored-by: Georgi Gerganov --- CMakeLists.txt | 19 +++- Makefile | 11 ++- ggml-opencl-dequant.cl | 84 ++++++++++++++++ ggml-opencl.c | 216 +++++++++++++++++++++++++++++++++++++++++ ggml-opencl.h | 24 +++++ ggml.c | 68 ++++++++++--- ggml.h | 3 +- llama.cpp | 2 +- 8 files changed, 411 insertions(+), 16 deletions(-) create mode 100644 ggml-opencl-dequant.cl create mode 100644 ggml-opencl.c create mode 100644 ggml-opencl.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 11ebe9eb66fae..5fdbeddfca443 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -67,6 +67,7 @@ endif() option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) option(LLAMA_OPENBLAS "llama: use OpenBLAS" OFF) option(LLAMA_CUBLAS "llama: use cuBLAS" OFF) +option(LLAMA_CLBLAST "llama: use CLBlast" OFF) option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE}) option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE}) @@ -168,6 +169,21 @@ if (LLAMA_CUBLAS) endif() endif() +if (LLAMA_CLBLAST) + find_package(CLBlast) + if (CLBlast_FOUND) + message(STATUS "CLBlast found") + + set(GGML_OPENCL_SOURCES ggml-opencl.c ggml-opencl.h) + + add_compile_definitions(GGML_USE_CLBLAST) + + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} clblast) + else() + message(WARNING "CLBlast not found") + endif() +endif() + if (LLAMA_ALL_WARNINGS) if (NOT MSVC) set(c_flags @@ -307,7 +323,8 @@ endif() add_library(ggml OBJECT ggml.c ggml.h - ${GGML_CUDA_SOURCES}) + ${GGML_CUDA_SOURCES} + ${GGML_OPENCL_SOURCES}) target_include_directories(ggml PUBLIC .) target_compile_features(ggml PUBLIC c_std_11) # don't bump diff --git a/Makefile b/Makefile index f7c8dbfdc64ac..0715e857bc346 100644 --- a/Makefile +++ b/Makefile @@ -105,14 +105,21 @@ ifdef LLAMA_OPENBLAS LDFLAGS += -lopenblas endif ifdef LLAMA_CUBLAS - CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include - LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib + CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include + LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib OBJS += ggml-cuda.o NVCC = nvcc NVCCFLAGS = --forward-unknown-to-host-compiler -arch=native ggml-cuda.o: ggml-cuda.cu ggml-cuda.h $(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -Wno-pedantic -c $< -o $@ endif +ifdef LLAMA_CLBLAST + CFLAGS += -DGGML_USE_CLBLAST + LDFLAGS += -lclblast -lOpenCL + OBJS += ggml-opencl.o +ggml-opencl.o: ggml-opencl.c ggml-opencl.h + $(CC) $(CFLAGS) -c $< -o $@ +endif ifdef LLAMA_GPROF CFLAGS += -pg CXXFLAGS += -pg diff --git a/ggml-opencl-dequant.cl b/ggml-opencl-dequant.cl new file mode 100644 index 0000000000000..191b2e57500ad --- /dev/null +++ b/ggml-opencl-dequant.cl @@ -0,0 +1,84 @@ +#define MULTILINE_QUOTE(...) #__VA_ARGS__ +const char * clblast_dequant = MULTILINE_QUOTE( + +struct block_q4_0 +{ + float d; + uchar qs[16]; +}; + +__kernel void dequantize_row_q4_0(__global struct block_q4_0* blocks, __global float* result) { + const uint i = get_global_id(0) / 32; + const uint l = get_local_id(0); + + const float d = blocks[i].d; + + const uchar vi = blocks[i].qs[l]; + + const uint index = i*32 + l*2; + result[index + 0] = ((vi & 0xf) - 8)*d; + result[index + 1] = ((vi >> 4) - 8)*d; +} + +struct block_q4_1 +{ + float d; + float m; + uchar qs[16]; +}; + +__kernel void dequantize_row_q4_1(__global struct block_q4_1* blocks, __global float* result) { + const uint i = get_global_id(0) / 32; + const uint l = get_local_id(0); + + const float d = blocks[i].d; + const float m = blocks[i].m; + + const uchar vi = blocks[i].qs[l]; + + const uint index = i*32 + l*2; + result[index + 0] = (vi & 0xf) * d + m; + result[index + 1] = (vi >> 4) * d + m; +} + +struct block_q4_2 +{ + ushort d; + uchar qs[8]; +}; + +__kernel void dequantize_row_q4_2(__global struct block_q4_2* blocks, __global float* result) { + const uint i = get_global_id(0) / 16; + const uint l = get_local_id(0); + + const float d = vload_half(0, (__global half*) &blocks[i].d);; + + const uchar vi = blocks[i].qs[l]; + + const uint index = i*16 + l*2; + result[index + 0] = ((vi & 0xf) - 8)*d; + result[index + 1] = ((vi >> 4) - 8)*d; +} + +struct block_q4_3 +{ + ushort d; + ushort m; + uchar qs[8]; +}; + +__kernel void dequantize_row_q4_3(__global struct block_q4_3* blocks, __global float* result) { + const uint i = get_global_id(0) / 16; + const uint l = get_local_id(0); + + const float d = vload_half(0, (__global half*) &(blocks[i].d)); + const float m = vload_half(0, (__global half*) &(blocks[i].m)); + + const uchar vi = blocks[i].qs[l]; + + const uint index = i*16 + l*2; + result[index + 0] = (vi & 0xf) * d + m; + result[index + 1] = (vi >> 4) * d + m; +} + +); diff --git a/ggml-opencl.c b/ggml-opencl.c new file mode 100644 index 0000000000000..1d68f19ee1e78 --- /dev/null +++ b/ggml-opencl.c @@ -0,0 +1,216 @@ +#include "ggml-opencl.h" + +#define CL_TARGET_OPENCL_VERSION 110 +#include + +#include +#include + +#include "ggml.h" + +#include "ggml-opencl-dequant.cl" + +#define CL_CHECK(err, name) \ + do { \ + cl_int err_ = (err); \ + if (err_ != CL_SUCCESS) { \ + fprintf(stderr, "OpenCL %s error %d at %s:%d\n", name, err_, __FILE__, __LINE__); \ + exit(1); \ + } \ + } while (0) + +static cl_platform_id platform; +static cl_device_id device; +static cl_context context; +static cl_command_queue queue; +static cl_program program; +static cl_kernel kernel_q4_0, kernel_q4_1, kernel_q4_2, kernel_q4_3; +static cl_mem cl_buffer_a, cl_buffer_qb, cl_buffer_b, cl_buffer_c; +static size_t cl_size_a = 0, cl_size_qb = 0, cl_size_b = 0, cl_size_c = 0; + +static cl_program build_program_from_source(cl_context ctx, cl_device_id dev, const char* program_buffer) { + cl_program p; + char *program_log; + size_t program_size, log_size; + int err; + + program_size = strlen(program_buffer); + + p = clCreateProgramWithSource(ctx, 1, (const char**)&program_buffer, &program_size, &err); + if(err < 0) { + fprintf(stderr, "OpenCL error creating program"); + exit(1); + } + + err = clBuildProgram(p, 0, NULL, NULL, NULL, NULL); + if(err < 0) { + + clGetProgramBuildInfo(p, dev, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); + program_log = (char*) malloc(log_size + 1); + program_log[log_size] = '\0'; + clGetProgramBuildInfo(p, dev, CL_PROGRAM_BUILD_LOG, log_size + 1, program_log, NULL); + printf("%s\n", program_log); + free(program_log); + exit(1); + } + + return p; +} + +void ggml_cl_init(void) { + cl_int err = 0; + char * GGML_CLBLAST_PLATFORM = getenv("GGML_CLBLAST_PLATFORM"); + char * GGML_CLBLAST_DEVICE = getenv("GGML_CLBLAST_DEVICE"); + int plat_num = (GGML_CLBLAST_PLATFORM == NULL ? 0 : atoi(GGML_CLBLAST_PLATFORM)); + int dev_num = (GGML_CLBLAST_DEVICE == NULL ? 0 : atoi(GGML_CLBLAST_DEVICE)); + printf("\nInitializing CLBlast (First Run)..."); + printf("\nAttempting to use: Platform=%d, Device=%d (If invalid, program will crash)\n",plat_num,dev_num); + cl_uint num_platforms; + clGetPlatformIDs(0, NULL, &num_platforms); + cl_platform_id* platforms = (cl_platform_id*)malloc(num_platforms*sizeof(cl_platform_id)); + clGetPlatformIDs(num_platforms, platforms, NULL); + platform = platforms[plat_num]; + char platform_buffer[1024]; + clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(platform_buffer), &platform_buffer, NULL); + cl_uint num_devices; + clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices); + cl_device_id* devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id)); + clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devices, devices, NULL); + device = devices[dev_num]; + char device_buffer[1024]; + clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_buffer), &device_buffer, NULL); + printf("Using Platform: %s Device: %s\n", platform_buffer, device_buffer); + context = clCreateContext(NULL, 1, &device, NULL, NULL, &err); + CL_CHECK(err, "clCreateContext"); + queue = clCreateCommandQueue(context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); + CL_CHECK(err, "clCreateCommandQueue"); + + free(platforms); + free(devices); + + program = build_program_from_source(context, device, clblast_dequant); + + // Prepare dequantize kernels + kernel_q4_0 = clCreateKernel(program, "dequantize_row_q4_0", &err); + CL_CHECK(err, "clCreateKernel"); + kernel_q4_1 = clCreateKernel(program, "dequantize_row_q4_1", &err); + CL_CHECK(err, "clCreateKernel"); + kernel_q4_2 = clCreateKernel(program, "dequantize_row_q4_2", &err); + CL_CHECK(err, "clCreateKernel"); + kernel_q4_3 = clCreateKernel(program, "dequantize_row_q4_3", &err); + CL_CHECK(err, "clCreateKernel"); +} + +static void ggml_cl_malloc(size_t req_size, size_t* cur_size, cl_mem_flags flags, cl_mem* buf) { + if (req_size <= *cur_size) { + return; + } + + // Reallocate buffer with enough space + if (*cur_size > 0) { + clReleaseMemObject(*buf); + } + cl_int err; + *buf = clCreateBuffer(context, flags, req_size, NULL, &err); + *cur_size = req_size; + CL_CHECK(err, "clCreateBuffer"); +} + +void ggml_cl_sgemm_wrapper( + const enum ggml_blas_order order, const enum ggml_blas_op trans_a, const enum ggml_blas_op trans_b, + const int m, const int n, const int k, + const float alpha, const void *host_a, const int lda, + const float *host_b, const int ldb, const float beta, + float *host_c, const int ldc, const int btype) { + cl_int err = 0; + + cl_kernel kernel; + size_t global = n * k, local, size_qb; + bool dequant; + + switch (btype) { + case GGML_TYPE_F32: + dequant = false; + break; + case GGML_TYPE_Q4_0: + dequant = true; + kernel = kernel_q4_0; + local = 16; + size_qb = global * (sizeof(float) + local) / 32; + break; + case GGML_TYPE_Q4_1: + dequant = true; + kernel = kernel_q4_1; + local = 16; + size_qb = global * (sizeof(float) * 2 + local) / 32; + break; + case GGML_TYPE_Q4_2: + dequant = true; + kernel = kernel_q4_2; + local = 8; + size_qb = global * (sizeof(short) + local) / 16; + break; + case GGML_TYPE_Q4_3: + dequant = true; + kernel = kernel_q4_3; + local = 8; + size_qb = global * (sizeof(short) * 2 + local) / 16; + break; + default: + fprintf(stderr, "Error: Unsupported OpenCL btype %d\n", btype); + abort(); + } + + const size_t size_a = m * k * sizeof(float); + const size_t size_b = n * k * sizeof(float); + const size_t size_c = m * n * sizeof(float); + + // Prepare buffers + ggml_cl_malloc(size_a, &cl_size_a, CL_MEM_READ_ONLY, &cl_buffer_a); + if (dequant) { + ggml_cl_malloc(size_qb, &cl_size_qb, CL_MEM_READ_ONLY, &cl_buffer_qb); + } + ggml_cl_malloc(size_b, &cl_size_b, CL_MEM_READ_WRITE, &cl_buffer_b); + ggml_cl_malloc(size_c, &cl_size_c, CL_MEM_WRITE_ONLY, &cl_buffer_c); + + cl_event ev_a, ev_qb, ev_b; + + if (dequant) { + err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &cl_buffer_qb); + err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &cl_buffer_b); + CL_CHECK(err, "clSetKernelArg"); + clEnqueueWriteBuffer(queue, cl_buffer_qb, CL_FALSE, 0, size_qb, host_b, 0, NULL, &ev_qb); + } else { + clEnqueueWriteBuffer(queue, cl_buffer_b, CL_FALSE, 0, size_b, host_b, 0, NULL, &ev_b); + } + + clEnqueueWriteBuffer(queue, cl_buffer_a, CL_FALSE, 0, size_a, host_a, 0, NULL, &ev_a); + if (dequant) { + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 1, &ev_qb, &ev_b); + CL_CHECK(err, "clEnqueueNDRangeKernel"); + clReleaseEvent(ev_qb); + } + clWaitForEvents(1, &ev_a); + clWaitForEvents(1, &ev_b); + clReleaseEvent(ev_a); + clReleaseEvent(ev_b); + + cl_event ev_sgemm; + CLBlastSgemm((CLBlastLayout)order, + (CLBlastTranspose)trans_a, (CLBlastTranspose)trans_b, + m, n, k, + alpha, + cl_buffer_a, 0, lda, + cl_buffer_b, 0, ldb, + beta, + cl_buffer_c, 0, ldc, + &queue, &ev_sgemm); + + cl_event ev_c; + clEnqueueReadBuffer(queue, cl_buffer_c, CL_TRUE, 0, size_c, host_c, 1, &ev_sgemm, &ev_c); + + // Wait for completion + clWaitForEvents(1, &ev_c); + clReleaseEvent(ev_sgemm); + clReleaseEvent(ev_c); +} diff --git a/ggml-opencl.h b/ggml-opencl.h new file mode 100644 index 0000000000000..7bcc603ef8432 --- /dev/null +++ b/ggml-opencl.h @@ -0,0 +1,24 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +void ggml_cl_init(void); + +enum ggml_blas_order { + GGML_BLAS_ORDER_ROW_MAJOR = 101, + GGML_BLAS_ORDER_COLUMN_MAJOR = 102, +}; + +enum ggml_blas_op { + GGML_BLAS_OP_N = 111, + GGML_BLAS_OP_T = 112, + GGML_BLAS_OP_C = 113, +}; + +void ggml_cl_sgemm_wrapper(const enum ggml_blas_order order, const enum ggml_blas_op trans_a, const enum ggml_blas_op trans_b, const int m, const int n, const int k, const float alpha, const void *host_a, const int lda, const float *host_b, const int ldb, const float beta, float *host_c, const int ldc, const int btype); + +#ifdef __cplusplus +} +#endif diff --git a/ggml.c b/ggml.c index 1fbf2955d6730..33fb1681eaec4 100644 --- a/ggml.c +++ b/ggml.c @@ -149,6 +149,8 @@ inline static void* ggml_aligned_malloc(size_t size) { #include #elif defined(GGML_USE_CUBLAS) #include "ggml-cuda.h" +#elif defined(GGML_USE_CLBLAST) +#include "ggml-opencl.h" #endif #undef MIN @@ -4363,6 +4365,8 @@ struct ggml_context * ggml_init(struct ggml_init_params params) { // initialize cuBLAS #if defined(GGML_USE_CUBLAS) ggml_init_cublas(); + #elif defined(GGML_USE_CLBLAST) + ggml_cl_init(); #endif is_first_call = false; @@ -8104,7 +8108,7 @@ static void ggml_compute_forward_rms_norm( // ggml_compute_forward_mul_mat -#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) // helper function to determine if it is better to use BLAS or not // for large matrices, BLAS is faster static bool ggml_compute_forward_mul_mat_use_blas( @@ -8129,6 +8133,7 @@ static bool ggml_compute_forward_mul_mat_use_blas( return false; } + #endif static void ggml_compute_forward_mul_mat_f32( @@ -8144,7 +8149,7 @@ static void ggml_compute_forward_mul_mat_f32( const int64_t ne02 = src0->ne[2]; const int64_t ne03 = src0->ne[3]; -#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) const int64_t ne10 = src1->ne[0]; #endif const int64_t ne11 = src1->ne[1]; @@ -8201,7 +8206,7 @@ static void ggml_compute_forward_mul_mat_f32( // nb01 >= nb00 - src0 is not transposed // compute by src0 rows -#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) { if (params->ith != 0) { return; @@ -8250,8 +8255,15 @@ static void ggml_compute_forward_mul_mat_f32( // copy data to host CUDA_CHECK(cudaMemcpyAsync(d, d_D, sizeof(float) * d_ne, cudaMemcpyDeviceToHost, g_cudaStream)); -#else +#elif defined(GGML_USE_CLBLAST) // zT = y * xT + ggml_cl_sgemm_wrapper(GGML_BLAS_ORDER_ROW_MAJOR, GGML_BLAS_OP_N, GGML_BLAS_OP_T, + ne11, ne01, ne10, + 1.0f, y, ne10, + x, ne10, + 0.0f, d, ne01, + GGML_TYPE_F32); +#else cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, ne11, ne01, ne10, 1.0f, y, ne10, @@ -8395,7 +8407,7 @@ static void ggml_compute_forward_mul_mat_f16_f32( // nb01 >= nb00 - src0 is not transposed // compute by src0 rows -#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) { GGML_ASSERT(nb10 == sizeof(float)); @@ -8472,6 +8484,19 @@ static void ggml_compute_forward_mul_mat_f16_f32( // copy data to host CUDA_CHECK(cudaMemcpyAsync(d, d_D, sizeof(float) * d_ne, cudaMemcpyDeviceToHost, g_cudaStream)); +#elif defined(GGML_USE_CLBLAST) + const float * x = wdata; + const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13); + + float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3); + + // zT = y * xT + ggml_cl_sgemm_wrapper(GGML_BLAS_ORDER_ROW_MAJOR, GGML_BLAS_OP_N, GGML_BLAS_OP_T, + ne11, ne01, ne10, + 1.0f, y, ne10, + x, ne10, + 0.0f, d, ne01, + GGML_TYPE_F32); #else const float * x = wdata; const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13); @@ -8646,7 +8671,7 @@ static void ggml_compute_forward_mul_mat_q_f32( // nb01 >= nb00 - src0 is not transposed // compute by src0 rows -#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) if (ggml_compute_forward_mul_mat_use_blas(src0, src1, dst)) { if (params->ith != 0) { return; @@ -8698,7 +8723,7 @@ static void ggml_compute_forward_mul_mat_q_f32( else { GGML_ASSERT(false); } -#else +#elif !defined(GGML_USE_CLBLAST) float * const wdata = params->wdata; dequantize_row_q_t const dequantize_row_q = quantize_fns[type].dequantize_row_q; #endif @@ -8717,6 +8742,8 @@ static void ggml_compute_forward_mul_mat_q_f32( dequantize_row_q_cuda(d_Q, d_X, ne01 * ne00, g_cudaStream); CUDA_CHECK(cudaGetLastError()); +#elif defined(GGML_USE_CLBLAST) + const void* x = (char *) src0->data + i03*nb03 + i02*nb02; #else { size_t id = 0; @@ -8743,8 +8770,15 @@ static void ggml_compute_forward_mul_mat_q_f32( // copy data to host CUDA_CHECK(cudaMemcpyAsync(d, d_D, sizeof(float) * d_ne, cudaMemcpyDeviceToHost, g_cudaStream)); -#else +#elif defined(GGML_USE_CLBLAST) // zT = y * xT + ggml_cl_sgemm_wrapper(GGML_BLAS_ORDER_ROW_MAJOR, GGML_BLAS_OP_N, GGML_BLAS_OP_T, + ne11, ne01, ne10, + 1.0f, y, ne10, + x, ne10, + 0.0f, d, ne01, + type); +#else cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, ne11, ne01, ne10, 1.0f, y, ne10, @@ -11583,7 +11617,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) size_t cur = 0; if (node->src0->type == GGML_TYPE_F16 && node->src1->type == GGML_TYPE_F32) { -#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) { node->n_tasks = 1; // TODO: this actually is doing nothing // the threads are still spinning @@ -11600,7 +11634,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) } else if (node->src0->type == GGML_TYPE_F32 && node->src1->type == GGML_TYPE_F32) { cur = 0; } else if (ggml_is_quantized(node->src0->type) && node->src1->type == GGML_TYPE_F32) { -#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) if (ggml_compute_forward_mul_mat_use_blas(node->src0, node->src1, node)) { node->n_tasks = 1; cur = GGML_TYPE_SIZE[GGML_TYPE_F32]*(node->src0->ne[0]*node->src0->ne[1]); @@ -13100,7 +13134,7 @@ int ggml_cpu_has_wasm_simd(void) { } int ggml_cpu_has_blas(void) { -#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) +#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) return 1; #else return 0; @@ -13115,6 +13149,18 @@ int ggml_cpu_has_cublas(void) { #endif } +int ggml_cpu_has_clblast(void) { +#if defined(GGML_USE_CLBLAST) + return 1; +#else + return 0; +#endif +} + +int ggml_cpu_has_gpublas(void) { + return ggml_cpu_has_cublas() || ggml_cpu_has_clblast(); +} + int ggml_cpu_has_sse3(void) { #if defined(__SSE3__) return 1; diff --git a/ggml.h b/ggml.h index d9d3d214e84e7..1bbe2db93f5d1 100644 --- a/ggml.h +++ b/ggml.h @@ -858,10 +858,11 @@ extern "C" { GGML_API int ggml_cpu_has_wasm_simd (void); GGML_API int ggml_cpu_has_blas (void); GGML_API int ggml_cpu_has_cublas (void); + GGML_API int ggml_cpu_has_clblast (void); + GGML_API int ggml_cpu_has_gpublas (void); GGML_API int ggml_cpu_has_sse3 (void); GGML_API int ggml_cpu_has_vsx (void); - // // Internal types and functions exposed for tests and benchmarks // diff --git a/llama.cpp b/llama.cpp index 28a74b514b852..bfebf14bfde3f 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1085,7 +1085,7 @@ static bool llama_eval_internal( // for big prompts, if BLAS is enabled, it is better to use only one thread // otherwise, the threads are spin-lock waiting for the BLAS calls and are degrading the performance ggml_cgraph gf = {}; - gf.n_threads = N >= 32 && ggml_cpu_has_blas() && !ggml_cpu_has_cublas() ? 1 : n_threads; + gf.n_threads = N >= 32 && ggml_cpu_has_blas() && !ggml_cpu_has_gpublas() ? 1 : n_threads; struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); memcpy(embd->data, tokens, N*ggml_element_size(embd)); From 11d902364b0e3b503a02a4e757ee2dc38aacb68f Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 28 Apr 2023 17:58:44 +0300 Subject: [PATCH 4/8] ggml : add helper debug printf in soft_max --- ggml.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ggml.c b/ggml.c index 33fb1681eaec4..44293dac92668 100644 --- a/ggml.c +++ b/ggml.c @@ -9257,6 +9257,7 @@ static void ggml_compute_forward_soft_max_f32( uint16_t scvt; for (int i = 0; i < nc; i++) { + //printf("p[%3d] = %8.4f\n", i, p[i]); if (p[i] == -INFINITY) { p[i] = 0.0f; } else { From 1481a9cf25ea2e4abef6b13a57660a35f3e66af1 Mon Sep 17 00:00:00 2001 From: Evan Jones Date: Fri, 28 Apr 2023 11:59:37 -0400 Subject: [PATCH 5/8] llama : add session file format and saved sessions in main (#1169) --- examples/chat-13B.sh | 4 +- examples/common.cpp | 7 ++++ examples/common.h | 1 + examples/main/main.cpp | 89 ++++++++++++++++++++++++++++++++++++++++++ llama.cpp | 53 +++++++++++++++++++++++++ llama.h | 4 ++ 6 files changed, 156 insertions(+), 2 deletions(-) diff --git a/examples/chat-13B.sh b/examples/chat-13B.sh index 4265d7b662427..2fac377840be4 100755 --- a/examples/chat-13B.sh +++ b/examples/chat-13B.sh @@ -31,8 +31,6 @@ The transcript only includes text, it does not include markup like HTML and Mark $USER_NAME: Hello, $AI_NAME! $AI_NAME: Hello $USER_NAME! How may I help you today? -$USER_NAME: What time is it? -$AI_NAME: It is $(date +%H:%M). $USER_NAME: What year is it? $AI_NAME: We are in $(date +%Y). $USER_NAME: Please tell me the largest city in Europe. @@ -50,4 +48,6 @@ $AI_NAME: The arguments are stored in process.argv. argv[3] is the second argument passed to the script and so on. $USER_NAME: Name a color. $AI_NAME: Blue +$USER_NAME: What time is it? +$AI_NAME: It is $(date +%H:%M). $USER_NAME:" "$@" diff --git a/examples/common.cpp b/examples/common.cpp index c0e87eb9f4585..9f10dc268558b 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -61,6 +61,12 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { break; } params.prompt = argv[i]; + } else if (arg == "--session") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.path_session = argv[i]; } else if (arg == "-f" || arg == "--file") { if (++i >= argc) { invalid_param = true; @@ -228,6 +234,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads); fprintf(stderr, " -p PROMPT, --prompt PROMPT\n"); fprintf(stderr, " prompt to start generation with (default: empty)\n"); + fprintf(stderr, " --session FNAME file to cache model state in (may be large!) (default: none)\n"); fprintf(stderr, " --random-prompt start with a randomized prompt.\n"); fprintf(stderr, " --in-prefix STRING string to prefix user inputs with (default: empty)\n"); fprintf(stderr, " -f FNAME, --file FNAME\n"); diff --git a/examples/common.h b/examples/common.h index 6f26b514da1ce..9d3697d793eff 100644 --- a/examples/common.h +++ b/examples/common.h @@ -31,6 +31,7 @@ struct gpt_params { std::string model = "models/lamma-7B/ggml-model.bin"; // model path std::string prompt = ""; + std::string path_session = ""; // path to file for saving/loading model eval state std::string input_prefix = ""; // string to prefix user inputs with std::vector antiprompt; // string upon seeing which more user input is prompted diff --git a/examples/main/main.cpp b/examples/main/main.cpp index f9c9e9d98fd86..fda65574fad7a 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -157,6 +157,32 @@ int main(int argc, char ** argv) { // Add a space in front of the first character to match OG llama tokenizer behavior params.prompt.insert(0, 1, ' '); + std::string path_session = params.path_session; + std::vector session_tokens; + + if (!path_session.empty()) { + fprintf(stderr, "%s: attempting to load saved session from %s..\n", __func__, path_session.c_str()); + + // REVIEW - fopen to check for existing session + FILE * fp = std::fopen(path_session.c_str(), "rb"); + if (fp != NULL) { + std::fclose(fp); + + session_tokens.resize(params.n_ctx); + size_t n_token_count_out = 0; + const size_t n_session_bytes = llama_load_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.capacity(), &n_token_count_out); + session_tokens.resize(n_token_count_out); + + if (n_session_bytes > 0) { + fprintf(stderr, "%s: loaded %zu bytes of session data!\n", __func__, n_session_bytes); + } else { + fprintf(stderr, "%s: could not load session file, will recreate\n", __func__); + } + } else { + fprintf(stderr, "%s: session file does not exist, will create\n", __func__); + } + } + // tokenize the prompt auto embd_inp = ::llama_tokenize(ctx, params.prompt, true); @@ -167,6 +193,26 @@ int main(int argc, char ** argv) { return 1; } + // debug message about similarity of saved session, if applicable + size_t n_matching_session_tokens = 0; + if (session_tokens.size()) { + for (llama_token id : session_tokens) { + if (n_matching_session_tokens >= embd_inp.size() || id != embd_inp[n_matching_session_tokens]) { + break; + } + n_matching_session_tokens++; + } + if (n_matching_session_tokens >= embd_inp.size()) { + fprintf(stderr, "%s: session file has exact match for prompt!\n", __func__); + } else if (n_matching_session_tokens < (embd_inp.size() / 2)) { + fprintf(stderr, "%s: warning: session file has low similarity to prompt (%zu / %zu tokens); will mostly be reevaluated\n", + __func__, n_matching_session_tokens, embd_inp.size()); + } else { + fprintf(stderr, "%s: session file matches %zu / %zu tokens of prompt\n", + __func__, n_matching_session_tokens, embd_inp.size()); + } + } + // number of tokens to keep when resetting context if (params.n_keep < 0 || params.n_keep > (int)embd_inp.size() || params.instruct) { params.n_keep = (int)embd_inp.size(); @@ -252,9 +298,16 @@ int main(int argc, char ** argv) { bool is_antiprompt = false; bool input_noecho = false; + // HACK - because session saving incurs a non-negligible delay, for now skip re-saving session + // if we loaded a session with at least 75% similarity. It's currently just used to speed up the + // initial prompt so it doesn't need to be an exact match. + bool need_to_save_session = !path_session.empty() && n_matching_session_tokens < (embd_inp.size() * 3 / 4); + + int n_past = 0; int n_remain = params.n_predict; int n_consumed = 0; + int n_session_consumed = 0; // the first thing we will do is to output the prompt, so set color accordingly set_console_color(con_st, CONSOLE_COLOR_PROMPT); @@ -276,6 +329,9 @@ int main(int argc, char ** argv) { // insert n_left/2 tokens at the start of embd from last_n_tokens embd.insert(embd.begin(), last_n_tokens.begin() + n_ctx - n_left/2 - embd.size(), last_n_tokens.end() - embd.size()); + // REVIEW - stop saving session if we run out of context + path_session = ""; + //printf("\n---\n"); //printf("resetting: '"); //for (int i = 0; i < (int) embd.size(); i++) { @@ -285,6 +341,28 @@ int main(int argc, char ** argv) { //printf("\n---\n"); } + // try to reuse a matching prefix from the loaded session instead of re-eval (via n_past) + // REVIEW + if (n_session_consumed < (int) session_tokens.size()) { + size_t i = 0; + for ( ; i < embd.size(); i++) { + if (embd[i] != session_tokens[n_session_consumed]) { + session_tokens.resize(n_session_consumed); + break; + } + + n_past++; + n_session_consumed++; + + if (n_session_consumed >= (int) session_tokens.size()) { + break; + } + } + if (i > 0) { + embd.erase(embd.begin(), embd.begin() + i); + } + } + // evaluate tokens in batches // embd is typically prepared beforehand to fit within a batch, but not always for (int i = 0; i < (int) embd.size(); i += params.n_batch) { @@ -298,6 +376,11 @@ int main(int argc, char ** argv) { } n_past += n_eval; } + + if (embd.size() > 0 && !path_session.empty()) { + session_tokens.insert(session_tokens.end(), embd.begin(), embd.end()); + n_session_consumed = session_tokens.size(); + } } embd.clear(); @@ -309,6 +392,12 @@ int main(int argc, char ** argv) { const float temp = params.temp; const float repeat_penalty = params.repeat_penalty; + // optionally save the session on first sample (for faster prompt loading next time) + if (!path_session.empty() && need_to_save_session) { + need_to_save_session = false; + llama_save_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size()); + } + llama_token id = 0; { diff --git a/llama.cpp b/llama.cpp index bfebf14bfde3f..dca017db62503 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2431,3 +2431,56 @@ std::vector>& llama_internal_get_te return ctx->model.tensors_by_name; } +size_t llama_load_session_file(struct llama_context * ctx, const char * path_session, llama_token * tokens_out, size_t n_token_capacity, size_t * n_token_count_out) { + // TODO leverage mmap + llama_file file(path_session, "rb"); + const uint32_t magic = file.read_u32(); + const uint32_t version = file.read_u32(); + + if (!(magic == 'ggsn' && version == 0)) { + fprintf(stderr, "%s : unknown (magic, version) for session file: %08x, %08x\n", __func__, magic, version); + return 0; + } + + llama_hparams session_hparams; + file.read_raw(&session_hparams, sizeof(llama_hparams)); + + // REVIEW + if (session_hparams != ctx->model.hparams) { + fprintf(stderr, "%s : model hparams didn't match from session file!\n", __func__); + return 0; + } + + const uint32_t n_token_count = file.read_u32(); + LLAMA_ASSERT(n_token_capacity >= n_token_count); + file.read_raw(tokens_out, sizeof(llama_token) * n_token_count); + *n_token_count_out = n_token_count; + + const size_t n_state_size = file.size - file.tell(); + const size_t n_orig_state_size = llama_get_state_size(ctx); + if (n_state_size != n_orig_state_size) { + fprintf(stderr, "%s : failed to validate state size\n", __func__); + } + std::unique_ptr state_data(new uint8_t[n_state_size]); + file.read_raw(state_data.get(), n_state_size); + return llama_set_state_data(ctx, state_data.get()); +} + +size_t llama_save_session_file(struct llama_context * ctx, const char * path_session, const llama_token * tokens, size_t n_token_count) { + // TODO save temp & swap + llama_file file(path_session, "wb"); + + const size_t n_state_size = llama_get_state_size(ctx); + std::unique_ptr state_data(new uint8_t[n_state_size]); + llama_copy_state_data(ctx, state_data.get()); + + file.write_u32('ggsn'); // magic + file.write_u32(0); // version + file.write_raw(&ctx->model.hparams, sizeof(llama_hparams)); + + file.write_u32((uint32_t) n_token_count); // REVIEW + file.write_raw(tokens, sizeof(llama_token) * n_token_count); + + file.write_raw(state_data.get(), n_state_size); + return n_state_size; // REVIEW +} diff --git a/llama.h b/llama.h index 17dac0689fbb5..86a7d279a9ef4 100644 --- a/llama.h +++ b/llama.h @@ -133,6 +133,10 @@ extern "C" { // Returns the number of bytes read LLAMA_API size_t llama_set_state_data(struct llama_context * ctx, const uint8_t * src); + // Save/load session file + LLAMA_API size_t llama_load_session_file(struct llama_context * ctx, const char * path_session, llama_token * tokens_out, size_t n_token_capacity, size_t * n_token_count_out); + LLAMA_API size_t llama_save_session_file(struct llama_context * ctx, const char * path_session, const llama_token * tokens, size_t n_token_count); + // Run the llama inference to obtain the logits and probabilities for the next token. // tokens + n_tokens is the provided batch of new tokens to process // n_past is the number of tokens to use from previous eval calls From 5fba3c016bfd1d73a070e7c93dac14162ce118d0 Mon Sep 17 00:00:00 2001 From: CRD716 Date: Fri, 28 Apr 2023 11:13:33 -0500 Subject: [PATCH 6/8] examples : add Jeopardy example (#1168) * Basic Setup * Prevent Results.txt from coming up * Prefixes, Line separators, etc * editorcheck * introduction to give more consistent results * Basic graph thing * Grading, ready for testing! * Y'all ready to get funky? * fix column removal stuff * missed a few --- .gitignore | 2 + examples/jeopardy/README.md | 21 +++++++ examples/jeopardy/graph.py | 56 +++++++++++++++++ examples/jeopardy/jeopardy.sh | 30 ++++++++++ examples/jeopardy/qasheet.csv | 103 ++++++++++++++++++++++++++++++++ examples/jeopardy/questions.txt | 100 +++++++++++++++++++++++++++++++ 6 files changed, 312 insertions(+) create mode 100644 examples/jeopardy/README.md create mode 100644 examples/jeopardy/graph.py create mode 100644 examples/jeopardy/jeopardy.sh create mode 100644 examples/jeopardy/qasheet.csv create mode 100644 examples/jeopardy/questions.txt diff --git a/.gitignore b/.gitignore index c7573bb3b93c4..54dcebc4ddb7f 100644 --- a/.gitignore +++ b/.gitignore @@ -41,3 +41,5 @@ zig-out/ zig-cache/ ppl-*.txt + +examples/jeopardy/results.txt diff --git a/examples/jeopardy/README.md b/examples/jeopardy/README.md new file mode 100644 index 0000000000000..4c42e3cdbf526 --- /dev/null +++ b/examples/jeopardy/README.md @@ -0,0 +1,21 @@ +# llama.cpp/example/jeopardy + +This is pretty much just a straight port of aigoopy/llm-jeopardy/ with an added graph viewer. + +The jeopardy test can be used to compare the fact knowledge of different models and compare them to eachother. This is in contrast to some other tests, which test logical deduction, creativity, writing skills, etc. + + +Step 1: Open jeopardy.sh and modify the following: +``` +MODEL=(path to your model) +MODEL_NAME=(name of your model) +prefix=(basically, if you use vicuna it's Human: , if you use something else it might be User: , etc) +opts=(add -instruct here if needed for your model, or anything else you want to test out) +``` +Step 2: Run `jeopardy.sh` from the llama.cpp folder + +Step 3: Repeat steps 1 and 2 until you have all the results you need. + +Step 4: Run `graph.py`, and follow the instructions. At the end, it will generate your final graph. + +Note: The Human bar is based off of the full, original 100 sample questions. If you modify the question count or questions, it will not be valid. diff --git a/examples/jeopardy/graph.py b/examples/jeopardy/graph.py new file mode 100644 index 0000000000000..d00b2865263bb --- /dev/null +++ b/examples/jeopardy/graph.py @@ -0,0 +1,56 @@ +import matplotlib.pyplot as plt +import sys, os +import csv + +labels = [] +numbers = [] +numEntries = 1 + +rows = [] + +def bar_chart(numbers, labels, pos): + plt.bar(pos, numbers, color='blue') + plt.xticks(ticks=pos, labels=labels) + plt.title("Jeopardy Results by Model") + plt.xlabel("Model") + plt.ylabel("Questions Correct") + plt.show() + +def calculatecorrect(): + directory = os.fsencode("./examples/jeopardy/results/") + csv_reader = csv.reader(open("./examples/jeopardy/qasheet.csv", 'rt'), delimiter=',') + for row in csv_reader: + global rows + rows.append(row) + for listing in os.listdir(directory): + filename = os.fsdecode(listing) + if filename.endswith(".txt"): + file = open("./examples/jeopardy/results/" + filename, "rt") + global labels + global numEntries + global numbers + labels.append(filename[:-4]) + numEntries += 1 + i = 1 + totalcorrect = 0 + for line in file.readlines(): + if line.strip() != "------": + print(line) + else: + print("Correct answer: " + rows[i][2] + "\n") + i+=1 + print("Did the AI get the question right? (y/n)") + if input() == "y": + totalcorrect += 1 + numbers.append(totalcorrect) + + + +if __name__ == '__main__': + calculatecorrect() + pos = list(range(numEntries)) + labels.append("Human") + numbers.append(48.11) + bar_chart(numbers, labels, pos) + print(labels) + print(numbers) diff --git a/examples/jeopardy/jeopardy.sh b/examples/jeopardy/jeopardy.sh new file mode 100644 index 0000000000000..9bdbc755c13a7 --- /dev/null +++ b/examples/jeopardy/jeopardy.sh @@ -0,0 +1,30 @@ +#!/bin/bash +set -e + +MODEL=./models/ggml-vicuna-13b-1.1-q4_0.bin +MODEL_NAME=Vicuna + +# exec options +prefix="Human: " # Ex. Vicuna uses "Human: " +opts="--temp 0 -n 80" # additional flags +nl=' +' +introduction="You will be playing a game of Jeopardy. Simply answer the question in the correct format (Ex. What is Paris, or Who is George Washington)." + +# file options +question_file=./examples/jeopardy/questions.txt +touch ./examples/jeopardy/results/$MODEL_NAME.txt +output_file=./examples/jeopardy/results/$MODEL_NAME.txt + +counter=1 + +echo 'Running' +while IFS= read -r question +do + exe_cmd="./main -p "\"$prefix$introduction$nl$prefix$question\"" "$opts" -m ""\"$MODEL\""" >> ""\"$output_file\"" + echo $counter + echo "Current Question: $question" + eval "$exe_cmd" + echo -e "\n------" >> $output_file + counter=$((counter+1)) +done < "$question_file" diff --git a/examples/jeopardy/qasheet.csv b/examples/jeopardy/qasheet.csv new file mode 100644 index 0000000000000..35b08418956ab --- /dev/null +++ b/examples/jeopardy/qasheet.csv @@ -0,0 +1,103 @@ +Index,Original Category,Original Correct Question,Model Prompt +1,The Oscars,Who is John Williams?,Which actor Born in 1932 was the son of a percussionist in the CBS radio orchestra has been nominated for 53 Oscars? +2,English Literature,What is Paradise Lost?,"What work in English Literature says: 'The mind is its own place, & in itself can make a heaven of hell, a hell of heaven. What matter where, if I be still the same'?" +3,Writers’ Lesser-Known Works,Who is Niccolò Machiavelli?,"Known for more philosophical works, he wrote the play 'La Mandragola', in which Florentines are rewarded for immoral actions?" +4,Exploration,What is Easter Island (Rapa Nui)?,"James Cook's account of a 1774 visit where records an object 'near 27 feet long, and upwards of 8 feet over the breast or shoulders'?" +5,The Bill of Rights,What is the Eighth Amendment?,England's 'Bloody Assizes' & a 1685 life sentence for perjury were 2 main origins of which amendment to the U.S. Constitution? +6,Nobel Peace Prize Winners,Who are Nelson Mandela & Desmond Tutu?,"Which nobel peace price winners each lived at times on Vilakazi St. in Soweto , so it claims to be the world's only street home to 2 Nobel Peace Prize winners?" +7,Famous Names,Who is Walt Disney?,"In 1966, the year of who's death did he share plans for an experimental prototype community in Florida?" +8,Geography,What is Colombia?,"Of the 13 nations through which the Equator passes, what is the only one whose coastline borders the Caribbean Sea?" +9,Fashion History,What are rhinestones?,"Which decorative items in fashion history get their name from their origin in the port city of Strasbourg, on the border of France & Germany?" +10,Movies of the ’80s,What is Driving Miss Daisy?,What 1980's movie is based on an off-Broadway play with just 3 characters and won the Best Picture Oscar & the actors in all 3 roles were nominated? +11,Novelists,Who is John Grisham?,"A 2012 book review for which novelist noted subjects that 'sparked his ire': capital punishment, big tobacco & 'the plight of the unjustly convicted'?" +12,20th Century Eponyms,What is the Maginot Line?,"A 1940 headline about what 20th Century Eponym included 'failure', 'liability when it came to offense' & 'stout hearts no match for tanks'?" +13,City History,What is Stockholm?,"Over 700 years after its traditional 1252 founding date, what port city became associated with a psychological response?" +14,Brand Names,What is Jacuzzi?,"The success of what brand has its roots with a hydrotherapy pump its cofounder created for his son, who had arthritis?" +15,American Authors,Who is Washington Irving?,"In a periodical in 1807, what American Author called New York City 'Gotham, Gotham! Most enlightened of cities'?" +16,Symbols,What is “less than”?,What symbol is a rotated V in math and a feeling of some marginalized or underrepresented people in society? +17,Movie Theme Songs,Who is James Bond?,"Monty Norman, the composer of what character's theme, said the staccato riff conveyed sexiness, mystery & ruthlessness?" +18,American Novelists,Who is Joseph Heller?,"What American Novelist served with an airman named Yohannan in World War II & despite what readers might think, he said he enjoyed his service?" +19,Medieval Places,"What is Canterbury, England? (Canterbury Cathedral)","In what Medieval place did one of the participants in an 1170 event say, 'Let us away, knights; he will rise no more'?" +20,Countries of Africa,What is Morocco?,"At one time a province of the Roman Empire, what African country kingdom is known to Arabic scholars as Al-Maghrib Al-Aqsa, 'the far west'?" +21,Statehood,What is Wyoming?,Congress relented in 1890 after what prospective state said it would wait 100 years rather than come in without the women? +22,1980s Movies,What is Raiders of the Lost Ark?,"A writer & producer of what movie said he wanted it to be like a Western or James Bond film, 'only it takes place in the 30s'?" +23,Art Exhibitions,Who is Rembrandt?,In 1898 what's been called the first blockbuster art show was devoted to which artist & put on for Queen Wilhelmina's coronation? +24,Countries of the World,What is Mongolia?,"Part of the largest contiguous land empire during the 1200s & 1300s, today what is the world's second-largest landlocked country?" +25,Literature,What is “Howl”?,A 2006 book was titled 'The Poem That Changed America:' What 'Fifty Years Later'? +26,Invasions,Who is William of Orange?,"Backed by 14,000 troops, who invaded England to restore, in his words, its 'religion, laws, and liberties'?" +27,Landmarks,What is the Eiffel Tower?,"After its completion in the late 19th c., what was landmark was called 'a truly tragic street lamp' & a 'high & skinny pyramid of iron ladders'?" +28,Geographic Name’s the Same,What is Dover?,"The busiest passenger port in the U.K., what shares its name with a capital of one of the original 13 states?" +29,Names in the Bookstore,Who is Peter Mark Roget?,"This man made lists, perhaps to cope with depression; a set of lists he published in 1852 made whose name synonymous with a type of book?" +30,U.S. History,Who is Dr. Samuel Mudd?,"An 1869 presidential pardon was granted to which man, due in part to a plea by the Medical Society of Harford County, Maryland?" +31,American Literature,What is The Things They Carried?,"Letters, pocket knives, C rations & steel helmets are among the tangible items referred to in the title of what American literature modern war classic?" +32,Nonfiction,What is The Communist Manifesto,"What nonfiction book has the line, 'The discovery of America…opened up fresh ground for the rising bourgeoisie'?" +33, a new version was passed 81 years later,Laws in U.S. History,What is the Civil Rights Act?,,,,,,,,,,,,,,,,,,0, 2/3 +34,Names of Myth,Who is Helen of Troy?,"Whose brothers, Castor & Pollux, saved her after Theseus stole her away as a kid; a larger force would seek her later in life?" +35,African Countries,What is Sudan?,"Once Africa's largest country in area, what African Country dropped to third in 2011 when a portion of it declared independence?" +36,The Ancient World,What is Alexandria?,"The ancient writer Galen said books on ships arriving to what city's port were seized, originals kept & copies returned?" +37,Famous Names,Who is Andy Warhol?,"For a special 1970s cookbook, who provided one simple recipe–a can of Campbell's tomato soup & 2 cans of milk?" +38,People & Places,What is Guam?,"Thought to descend from people of Southeast Asia, the Chamorro make up what U.S. territory’s largest ethnic group?" +39,Current World Leaders,What is the Philippines?,"In office from 2022, the president of what country has taken so many foreign trips a play on his name is 'Ferdinand Magellan Jr.'?" +40,Writers & The South,Who is Tennessee Williams?,In 1939 which writer lived on Toulouse Street in the French Quarter & chose the professional name that bonded him to the South? +41,National Parks,What is Yellowstone?,"What National Park is named for a river indigenous people called Mi tse a-da-zi, translated by French-speaking trappers as 'Pierre Jaune'?" +42,Sports,Who are the Harlem Globetrotters?,"In 2010 who introduced the 4-point shot, 35 feet from the basket?" +43,The U.S. Military,What is “Top Gun”?,Losses over Asia in the 1960s led to the establishment of the program known as what at a San Diego naval base in 1969? +44,Art & Science,What is Halley’s Comet?,"A craft that visited what was named for Giotto, based on the story that 680 years earlier, the painter depicted it as the Star of Bethlehem?" +45,Words From World War I,What is “tank”?,"In World War I, 'Cistern' & 'reservoir' were suggested names for what secret invention, but the British preferred this less clumsy monosyllable?" +46,European History,What is Holy Roman Emperor?,"Until 1806, some German nobles included among their honors the title of 'Elector' for their role in selecting this personage?" +47,Theater History,Who is Peter Pan?,"In 1904, wearing a harness, actress Nina Boucicault became the first to play what character onstage?" +48,European Cities,What is Aachen?,"Alphabetically the first German city in encyclopedias, what was also the first one taken by the Allies in World War II?" +49,Word Origins,What is mantra?,This Sanskrit word referring to a spoken word or phrase comes from a word for 'to think'? +50,Inventions,What is barbed wire?,1917's 'Elements of Trench Warfare' said what Old West invention was 'difficult to destroy' & 'difficult to get through'? +51,World War II,What is Schindler’s list?,"Mimi Reinhard, who never learned to type using more than 2 fingers, produced what in World War II with 1,100 names, including hers?" +52, their offspring was the source of this mythical object,Mythology,What is the Golden Fleece? +53,Literature,What is Pride and Prejudice?,"Published in 2011, P.D. James' final novel, 'Death Comes to Pemberley', was a sequel to what novel from 200 years earlier?" +54, only these 2 west of the Mississippi River border each other,U.S. State Names,What are Oregon & Nevada? +55,Word Origins,What is passion?,"Originally relating to a story of suffering, what word now more commonly refers to strong emotion of any kind?" +56,World Cinema,What is La Vie en Rose?,"The 2007 biopic called 'La Môme' in France, meaning 'The Kid', was released in the U.S. under what other French title?" +57,History,What is Santa Maria?,"Returning home in 1493, Columbus stopped in the Azores at an island with what name, also something he'd lost off the Haiti coast?" +58,Landmarks,What is a kremlin?,Pskov & Nizhny Novgorod are 2 of the cities that have a fortress called what? +59,Foreign-Born Authors,Who is Vladimir Nabokov?,In the 1950s the New York Times said what author 'is writing about all lust' & his lecherous narrator 'is all of us'? +60,Astronomy & Geography,What is Capricorn?,"At the winter solstice, the sun is in Sagittarius; it once appeared in what constellation, giving a geographic feature its name?" +61,Television,What is Law & Order?,"Mike Post combined the sound of a slamming jail door, an anvil & 100 men stomping on a floor for what television series that debuted in 1990?" +62,British Landmarks,What is the Tower of London?,"Like Sir Thomas More, 3 16th century English queens are buried at what British location?" +63,Early American History,What are witches?,"In 1692 Increase Mather wrote, 'It were better that ten suspected' of these who 'escape, than that one innocent person … be condemned'?" +64,Geography Mnemonics,What are Arkansas and Louisiana?,"The Geography Mnemonic Mimal, sometimes said to be the silhouette of a chef or elf, stands for Minnesota, Iowa, Missouri, and what other 2 states?" +65,Business Milestones,What is the Ford Model T?,"What was first sold in 1908, at a price equivalent to about $27,000 today?" +66,In The Bookstore,Who is Tom Clancy?,The name of what author dead since 2013 now appears on books written by a former U.S. marshal & a former Apache helicopter pilot? +67,Historic Art,What is the Bayeux Tapestry?,The artwork once known in France as 'la tapisserie de la Reine Mathilde' is better known as what? +68,Pop Stars,Who is Madonna?,In 2022 which pop star became the first woman to have a Billboard Top 10 album in 5 decades starting with the 1980s? +69,Classic Tale Characters,Who is Scheherazade?,"In one 19th century translation, what female classic tale character 'perceived the dawn of day and ceased' speaking nearly 1,000 times?" +70,USA,What is Jack Daniel’s?,"Ironically, though what company founded in the 1860s is Moore County, Tennessee's largest employer, Moore is a dry county?" +71,Historic People,Who was William Bligh?,"After a 1789 event, who wrote, 'My first determination was to seek a supply of…water at Tofoa, & afterwards to sail for Tongataboo'?" +72,The Movies,What is The Godfather?,Laurence Olivier & Ernest Borgnine were considered for the lead role & Sergio Leone to direct for what film that turned 50 in 2022? +73,Continental Geography,What is Colombia?,"Until a 1903 secession, what country's contiguous territory spanned 2 continents?" +74,Foreign-Born Authors,Who is Isabel Allende?,"Early in her career which foreign-born author translated romance novels into Spanish, often changing the dialogue to make the heroines smarter?" +75,Historic Crimes,What is the Mona Lisa?,"Saying it was stolen by Napoleon, self-styled Italian patriot Vincenzo Peruggia took what in 1911?" +76,U.S. Bodies of Water,What is Lake Mead?,"Continuing a downward trend, in July 2022 what US body of water was at 27% capacity, its lowest level since 1937 when it was first being filled?" +77,Gods & Goddesses,Who is Aurora (or Eos)?,"Each morning which goddess began her ride in her chariot across the sky ahead of her brother Sol, or Helios?" +78,America At War,What is the Battle of New Orleans?,"Until the Civil War, the Jan. 8 date of what American battle of dubious military importance but big morale value was a national holiday?" +79,Children’s Books,What is The Velveteen Rabbit?,"Which children's book title character is told 'By the time you are real, most of your hair has been loved off your eyes drop out & you get shabby'?" +80,TV Finales,What is Grace and Frankie?,"In a TV reunion over 40 years in the making, Dolly Parton appeared as an angel named Agnes in the final episode of what comedy in 2022?" +81,American Poems,Who is Evangeline?,"In an 1847 American poem what character sees her town of Grand-Pré burned, but finally reunites with her beau for a kiss before his death?" +82,Famous Names,Who is Banksy?,"In 2001 who published a book called 'Banging Your Head Against a Brick Wall'; in 2002, 'Existencilism'?" +83,Children’s Lit,What is Charlotte’s Web?,The title object of what childrens book 'never looked more beautiful each strand held dozens of bright drops of early morning dew'? +84,Classic Songs,What is “Here Comes Santa Claus”?,The shouts of excited children at a 1946 holiday parade are said to have inspired what perennial classic song favorite? +85,Brand Names,What are Milk Duds?,"Unable to make what candies perfectly round, the confectioner embraced this flawed name for the product?" +86,Countries of the World,What is Italy?,"What country is home to 58 UNESCO World Heritage Sites, more than any other country; the sites include a volcano & a lagoon?" +87,Action Movies,What is Die Hard?,"What action movie's last line is 'If this is their idea of Christmas, I gotta be here for New Years'?" +88,Presidential Facts,Who is Woodrow Wilson?,Only 3 presidents have married while in office— John Tyler was the first & which one was the last? +89,19th Century Americans,Who is Frederick Douglass?,"Demonstrating the dignity & humanity of Black Americans, who sat for 160 known photographs, the most of any American in the 19th century?" +90,Latin Phrases,What is “quid pro quo”?,"Originally, which Latin 3-word phrase referred to when a doctor or apothecary substituted one medicine for another?" +91,1970s Movies,What is Monty Python and the Holy Grail?,The 1975 premiere of what movie comedy advertised free coconuts for the first thousand in the audience? +92,Name’s The Same,What is Manhattan?,"A cocktail, an island & a WWII venture originally called 'Development of Substitute Materials' all bear what name?" +93,U.S. Presidents,Who is Calvin Coolidge?,"Which US President was sworn in twice as President within 2 years, first by his father & then later by a former U.S. President?" +94,Plays,What is The Tempest?,A 1609 story in which an exiled king of Bulgaria creates a sea palace with his magic may have inspired the plot of what play? +95,Landmarks,What is the Berlin Wall?,"In 2009, during a 20th anniversary celebration, what landmark was called 'an edifice of fear. On Nov. 9, it became a place of joy'?" +96,World Capitals,"What is Vienna, Austria?","Among what world capital's nicknames are the 'City of Classical Music' &, possibly in honor of a famous resident from 1860 to 1938, the 'City of Dreams'?" +97,Language & Its Meanings,What is a night owl?,"Now meaning someone with nocturnal habits, what catches a sleeping dove in Shakespeare's 'Lucrece'?" +98,Flags of Our Hemisphere,What is Brazil?,"The stars on what country's flag represent states, 26 of them; unlike the USA's, its 'federal district' gets its own 27th star?" +99,Names in U.S. History,Who is Oliver Brown?,What father was the only man among the 13 plaintiffs in a US class-action case filed in 1951? +100,Children’s Authors,"Who is Sarah? (from Sarah, Plain and Tall)","Reversing the story of what heroine she created, childrens author Patricia Maclachlan was born on the prairie but spent much of her life in New England?" +,,, +TOTALS,,, diff --git a/examples/jeopardy/questions.txt b/examples/jeopardy/questions.txt new file mode 100644 index 0000000000000..eea78a057126c --- /dev/null +++ b/examples/jeopardy/questions.txt @@ -0,0 +1,100 @@ +Which man born in 1932 was the son of a percussionist in the CBS radio orchestra has been nominated for 53 Oscars? +What work in English Literature says: 'The mind is its own place, & in itself can make a heaven of hell, a hell of heaven. What matter where, if I be still the same'? +Known for more philosophical works, he wrote the play 'La Mandragola', in which Florentines are rewarded for immoral actions? +James Cook's account of a 1774 visit where records an object 'near 27 feet long, and upwards of 8 feet over the breast or shoulders'? +England's 'Bloody Assizes' & a 1685 life sentence for perjury were 2 main origins of which amendment to the U.S. Constitution? +Which nobel peace price winners each lived at times on Vilakazi St. in Soweto , so it claims to be the world's only street home to 2 Nobel Peace Prize winners? +In 1966, the year of who's death did he share plans for an experimental prototype community in Florida? +Of the 13 nations through which the Equator passes, what is the only one whose coastline borders the Caribbean Sea? +Which decorative items in fashion history get their name from their origin in the port city of Strasbourg, on the border of France & Germany? +What 1980's movie is based on an off-Broadway play with just 3 characters and won the Best Picture Oscar & the actors in all 3 roles were nominated? +A 2012 book review for which novelist noted subjects that 'sparked his ire': capital punishment, big tobacco & 'the plight of the unjustly convicted'? +A 1940 headline about what 20th Century Eponym included 'failure', 'liability when it came to offense' & 'stout hearts no match for tanks'? +Over 700 years after its traditional 1252 founding date, what port city became associated with a psychological response? +The success of what brand has its roots with a hydrotherapy pump its cofounder created for his son, who had arthritis? +In a periodical in 1807, what American Author called New York City 'Gotham, Gotham! Most enlightened of cities'? +What symbol is a rotated V in math and a feeling of some marginalized or underrepresented people in society? +Monty Norman, the composer of what character's theme, said the staccato riff conveyed sexiness, mystery & ruthlessness? +What American Novelist served with an airman named Yohannan in World War II & despite what readers might think, he said he enjoyed his service? +In what Medieval place did one of the participants in an 1170 event say, 'Let us away, knights; he will rise no more'? +At one time a province of the Roman Empire, what African country kingdom is known to Arabic scholars as Al-Maghrib Al-Aqsa, 'the far west'? +Congress relented in 1890 after what prospective state said it would wait 100 years rather than come in without the women? +A writer & producer of what movie said he wanted it to be like a Western or James Bond film, 'only it takes place in the 30s'? +In 1898 what's been called the first blockbuster art show was devoted to which artist & put on for Queen Wilhelmina's coronation? +Part of the largest contiguous land empire during the 1200s & 1300s, today what is the world's second-largest landlocked country? +A 2006 book was titled 'The Poem That Changed America:' What 'Fifty Years Later'? +Backed by 14,000 troops, who invaded England to restore, in his words, its 'religion, laws, and liberties'? +After its completion in the late 19th c., what was landmark was called 'a truly tragic street lamp' & a 'high & skinny pyramid of iron ladders'? +The busiest passenger port in the U.K., what shares its name with a capital of one of the original 13 states? +This man made lists, perhaps to cope with depression; a set of lists he published in 1852 made whose name synonymous with a type of book? +An 1869 presidential pardon was granted to which man, due in part to a plea by the Medical Society of Harford County, Maryland? +Letters, pocket knives, C rations & steel helmets are among the tangible items referred to in the title of what American literature modern war classic? +What nonfiction book has the line, 'The discovery of America…opened up fresh ground for the rising bourgeoisie'? +A radical Republican championed what 1875 act but the Supreme Court struck it down in 1883; a new version was passed 81 years later? +Whose brothers, Castor & Pollux, saved her after Theseus stole her away as a kid; a larger force would seek her later in life? +Once Africa's largest country in area, what African Country dropped to third in 2011 when a portion of it declared independence? +The ancient writer Galen said books on ships arriving to what city's port were seized, originals kept & copies returned? +For a special 1970s cookbook, who provided one simple recipe–a can of Campbell's tomato soup & 2 cans of milk? +Thought to descend from people of Southeast Asia, the Chamorro make up what U.S. territory’s largest ethnic group? +In office from 2022, the president of what country has taken so many foreign trips a play on his name is 'Ferdinand Magellan Jr.'? +In 1939 which writer lived on Toulouse Street in the French Quarter & chose the professional name that bonded him to the South? +What National Park is named for a river indigenous people called Mi tse a-da-zi, translated by French-speaking trappers as 'Pierre Jaune'? +In 2010 who introduced the 4-point shot, 35 feet from the basket? +Losses over Asia in the 1960s led to the establishment of the program known as what at a San Diego naval base in 1969? +A craft that visited what was named for Giotto, based on the story that 680 years earlier, the painter depicted it as the Star of Bethlehem? +In World War I, 'Cistern' & 'reservoir' were suggested names for what secret invention, but the British preferred this less clumsy monosyllable? +Until 1806, some German nobles included among their honors the title of 'Elector' for their role in selecting this personage? +In 1904, wearing a harness, actress Nina Boucicault became the first to play what character onstage? +Alphabetically the first German city in encyclopedias, what was also the first one taken by the Allies in World War II? +This Sanskrit word referring to a spoken word or phrase comes from a word for 'to think'? +1917's 'Elements of Trench Warfare' said what Old West invention was 'difficult to destroy' & 'difficult to get through'? +Mimi Reinhard, who never learned to type using more than 2 fingers, produced what in World War II with 1,100 names, including hers? +Poseidon carried off the maiden Theophane & turned her into a ewe; their offspring was the source of what mythical object? +Published in 2011, P.D. James' final novel, 'Death Comes to Pemberley', was a sequel to what novel from 200 years earlier? +5 U.S. states have 6-letter names; only which 2 west of the Mississippi River border each other? +Originally relating to a story of suffering, what word now more commonly refers to strong emotion of any kind? +The 2007 biopic called 'La Môme' in France, meaning 'The Kid', was released in the U.S. under what other French title? +Returning home in 1493, Columbus stopped in the Azores at an island with what name, also something he'd lost off the Haiti coast? +Pskov & Nizhny Novgorod are 2 of the cities that have a fortress called what? +In the 1950s the New York Times said what author 'is writing about all lust' & his lecherous narrator 'is all of us'? +At the winter solstice, the sun is in Sagittarius; it once appeared in what constellation, giving a geographic feature its name? +Mike Post combined the sound of a slamming jail door, an anvil & 100 men stomping on a floor for what television series that debuted in 1990? +Like Sir Thomas More, 3 16th century English queens are buried at what British location? +In 1692 Increase Mather wrote, 'It were better that ten suspected' of these who 'escape, than that one innocent person be condemned'? +The Geography Mnemonic Mimal, sometimes said to be the silhouette of a chef or elf, stands for Minnesota, Iowa, Missouri, and what other 2 states? +What was first sold in 1908, at a price equivalent to about $27,000 today? +The name of what author dead since 2013 now appears on books written by a former U.S. marshal & a former Apache helicopter pilot? +The artwork once known in France as 'la tapisserie de la Reine Mathilde' is better known as what? +In 2022 which pop star became the first woman to have a Billboard Top 10 album in 5 decades starting with the 1980s? +In one 19th century translation, what female classic tale character 'perceived the dawn of day and ceased' speaking nearly 1,000 times? +Ironically, though what company founded in the 1860s is Moore County, Tennessee's largest employer, Moore is a dry county? +After a 1789 event, who wrote, 'My first determination was to seek a supply of…water at Tofoa, & afterwards to sail for Tongataboo'? +Laurence Olivier & Ernest Borgnine were considered for the lead role & Sergio Leone to direct for what film that turned 50 in 2022? +Until a 1903 secession, what country's contiguous territory spanned 2 continents? +Early in her career which foreign-born author translated romance novels into Spanish, often changing the dialogue to make the heroines smarter? +Saying it was stolen by Napoleon, self-styled Italian patriot Vincenzo Peruggia took what in 1911? +Continuing a downward trend, in July 2022 what US body of water was at 27% capacity, its lowest level since 1937 when it was first being filled? +Each morning which goddess began her ride in her chariot across the sky ahead of her brother Sol, or Helios? +Until the Civil War, the Jan. 8 date of what American battle of dubious military importance but big morale value was a national holiday? +Which children's book title character is told 'By the time you are real, most of your hair has been loved off your eyes drop out & you get shabby'? +In a TV reunion over 40 years in the making, Dolly Parton appeared as an angel named Agnes in the final episode of what comedy in 2022? +In an 1847 American poem what character sees her town of Grand-Pré burned, but finally reunites with her beau for a kiss before his death? +In 2001 who published a book called 'Banging Your Head Against a Brick Wall'; in 2002, 'Existencilism'? +The title object of what childrens book 'never looked more beautiful each strand held dozens of bright drops of early morning dew'? +The shouts of excited children at a 1946 holiday parade are said to have inspired what perennial classic song favorite? +Unable to make what candies perfectly round, the confectioner embraced this flawed name for the product? +What country is home to 58 UNESCO World Heritage Sites, more than any other country; the sites include a volcano & a lagoon? +What action movie's last line is 'If this is their idea of Christmas, I gotta be here for New Years'? +Only 3 presidents have married while in office— John Tyler was the first & which one was the last? +Demonstrating the dignity & humanity of Black Americans, who sat for 160 known photographs, the most of any American in the 19th century? +Originally, which Latin 3-word phrase referred to when a doctor or apothecary substituted one medicine for another? +The 1975 premiere of what movie comedy advertised free coconuts for the first thousand in the audience? +A cocktail, an island & a WWII venture originally called 'Development of Substitute Materials' all bear what name? +Which US President was sworn in twice as President within 2 years, first by his father & then later by a former U.S. President? +A 1609 story in which an exiled king of Bulgaria creates a sea palace with his magic may have inspired the plot of what play? +In 2009, during a 20th anniversary celebration, what landmark was called 'an edifice of fear. On Nov. 9, it became a place of joy'? +Among what world capital's nicknames are the 'City of Classical Music' &, possibly in honor of a famous resident from 1860 to 1938, the 'City of Dreams'? +Now meaning someone with nocturnal habits, what catches a sleeping dove in Shakespeare's 'Lucrece'? +The stars on what country's flag represent states, 26 of them; unlike the USA's, its 'federal district' gets its own 27th star? +What father was the only man among the 13 plaintiffs in a US class-action case filed in 1951? +Reversing the story of what heroine she created, childrens author Patricia Maclachlan was born on the prairie but spent much of her life in New England? From 55390bcaf2579a5435564d7267ae3ed367837fd6 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 28 Apr 2023 20:37:43 +0300 Subject: [PATCH 7/8] ggml : sync ggml (ggml_alibi) --- ggml.c | 203 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- ggml.h | 9 +++ 2 files changed, 210 insertions(+), 2 deletions(-) diff --git a/ggml.c b/ggml.c index 44293dac92668..53796bd97d3ab 100644 --- a/ggml.c +++ b/ggml.c @@ -4034,7 +4034,7 @@ static const char * GGML_OP_LABEL[GGML_OP_COUNT] = { "MAP_BINARY", }; -static_assert(GGML_OP_COUNT == 38, "GGML_OP_COUNT != 38"); +static_assert(GGML_OP_COUNT == 39, "GGML_OP_COUNT != 39"); static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "none", @@ -4082,7 +4082,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "f(x,y)", }; -static_assert(GGML_OP_COUNT == 38, "GGML_OP_COUNT != 38"); +static_assert(GGML_OP_COUNT == 39, "GGML_OP_COUNT != 39"); static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN"); static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN"); @@ -6080,6 +6080,37 @@ struct ggml_tensor * ggml_rope( return result; } +// ggml_alibi + +struct ggml_tensor * ggml_alibi( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past, + int n_head) { + GGML_ASSERT(n_past >= 0); + bool is_node = false; + + if (a->grad) { + GGML_ASSERT(false); // TODO: implement backward + is_node = true; + } + + // TODO: when implement backward, fix this: + //struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); + struct ggml_tensor * result = ggml_view_tensor(ctx, a); + + struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2); + ((int32_t *) b->data)[0] = n_past; + ((int32_t *) b->data)[1] = n_head; + + result->op = GGML_OP_ALIBI; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; + result->src0 = a; + result->src1 = b; + + return result; +} + // ggml_conv_1d_1s struct ggml_tensor * ggml_conv_1d_1s( @@ -9300,6 +9331,162 @@ static void ggml_compute_forward_soft_max( } } +// ggml_compute_forward_alibi + +static void ggml_compute_forward_alibi_f32( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + assert(params->ith == 0); + assert(src1->type == GGML_TYPE_I32); + assert(ggml_nelements(src1) == 2); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int n_past = ((int32_t *) src1->data)[0]; + const int n_head = ((int32_t *) src1->data)[1]; + + const int ne0 = src0->ne[0]; // all_seq_len = n_past + ne1 + const int ne1 = src0->ne[1]; // seq_len_without_past + //const int ne2 = src0->ne[2]; // n_head -> this is k + //const int ne3 = src0->ne[3]; // 1 -> bsz + + const int n = ggml_nrows(src0); + const int ne2_ne3 = n/ne1; // ne2*ne3 + + const int nb0 = src0->nb[0]; + const int nb1 = src0->nb[1]; + const int nb2 = src0->nb[2]; + //const int nb3 = src0->nb[3]; + + assert(nb0 == sizeof(float)); + assert(ne1+n_past == ne0); + + // add alibi to src0 (KQ_scaled) + const int n_heads_log2_floor = 1 << (int) floor(log2(n_head)); + + const float m0 = powf(2.0f, -8.0f / n_heads_log2_floor); + const float m1 = powf(2.0f, -4.0f / n_heads_log2_floor); + + for (int i = 0; i < ne0; i++) { + for (int j = 0; j < ne1; j++) { + for (int k = 0; k < ne2_ne3; k++) { + float * const src = (float *)((char *) src0->data + i*nb0 + j*nb1 + k*nb2); + float * pdst = (float *)((char *) dst->data + i*nb0 + j*nb1 + k*nb2); + + // TODO: k*nb2 or k*nb3 + + float m_k; + + if (k < n_heads_log2_floor) { + m_k = powf(m0, k + 1); + } else { + m_k = powf(m1, 2 * (k - n_heads_log2_floor) + 1); + } + + pdst[0] = (j+1) * m_k + src[0]; + } + } + } +} + + +static void ggml_compute_forward_alibi_f16( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + assert(params->ith == 0); + assert(src1->type == GGML_TYPE_I32); + assert(ggml_nelements(src1) == 2); + + if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { + return; + } + + const int n_past = ((int32_t *) src1->data)[0]; + const int n_head = ((int32_t *) src1->data)[1]; + + const int ne0 = src0->ne[0]; // all_seq_len = n_past + ne1 + const int ne1 = src0->ne[1]; // seq_len_without_past + //const int ne2 = src0->ne[2]; // n_head -> this is k + //const int ne3 = src0->ne[3]; // 1 -> bsz + + const int n = ggml_nrows(src0); + const int ne2_ne3 = n/ne1; // ne2*ne3 + + const int nb0 = src0->nb[0]; + const int nb1 = src0->nb[1]; + const int nb2 = src0->nb[2]; + //const int nb3 = src0->nb[3]; + + assert(nb0 == sizeof(ggml_fp16_t)); + assert(ne1+n_past == ne0); + + // add alibi to src0 (KQ_scaled) + const int n_heads_log2_floor = 1 << (int) floor(log2(n_head)); + + const float m0 = powf(2.0f, -8.0f / n_heads_log2_floor); + const float m1 = powf(2.0f, -4.0f / n_heads_log2_floor); + + for (int i = 0; i < ne0; i++) { + for (int j = 0; j < ne1; j++) { + for (int k = 0; k < ne2_ne3; k++) { + ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i*nb0 + j*nb1 + k*nb2); + float * pdst = (float *)((char *) dst->data + i*nb0 + j*nb1 + k*nb2); + + // TODO: k*nb2 or k*nb3 + + float m_k; + + if (k < n_heads_log2_floor) { + m_k = powf(m0, k + 1); + } else { + m_k = powf(m1, 2 * (k - n_heads_log2_floor) + 1); + } + + // we return F32 + pdst[0] = (j+1) * m_k + GGML_FP16_TO_FP32(src[0]); + } + } + } +} + +static void ggml_compute_forward_alibi( + const struct ggml_compute_params * params, + const struct ggml_tensor * src0, + const struct ggml_tensor * src1, + struct ggml_tensor * dst) { + switch (src0->type) { + case GGML_TYPE_F16: + { + ggml_compute_forward_alibi_f16(params, src0, src1, dst); + } break; + case GGML_TYPE_F32: + { + ggml_compute_forward_alibi_f32(params, src0, src1, dst); + } break; + case GGML_TYPE_Q4_0: + case GGML_TYPE_Q4_1: + case GGML_TYPE_Q4_2: + case GGML_TYPE_Q4_3: + case GGML_TYPE_Q5_0: + case GGML_TYPE_Q5_1: + case GGML_TYPE_Q8_0: + case GGML_TYPE_Q8_1: + case GGML_TYPE_I8: + case GGML_TYPE_I16: + case GGML_TYPE_I32: + case GGML_TYPE_COUNT: + { + GGML_ASSERT(false); + } break; + } +} + // ggml_compute_forward_rope static void ggml_compute_forward_rope_f32( @@ -10938,6 +11125,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm { ggml_compute_forward_rope(params, tensor->src0, tensor->src1, tensor); } break; + case GGML_OP_ALIBI: + { + ggml_compute_forward_alibi(params, tensor->src0, tensor->src1, tensor); + } break; case GGML_OP_CONV_1D_1S: { ggml_compute_forward_conv_1d_1s(params, tensor->src0, tensor->src1, tensor); @@ -11140,6 +11331,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor { GGML_ASSERT(false); // TODO: not implemented } break; + case GGML_OP_ALIBI: + { + GGML_ASSERT(false); // TODO: not implemented + } break; case GGML_OP_SILU: { GGML_ASSERT(false); // TODO: not implemented @@ -11673,6 +11868,10 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) { node->n_tasks = n_threads; } break; + case GGML_OP_ALIBI: + { + node->n_tasks = 1; //TODO + } break; case GGML_OP_CONV_1D_1S: case GGML_OP_CONV_1D_2S: { diff --git a/ggml.h b/ggml.h index 1bbe2db93f5d1..540901f15a7f1 100644 --- a/ggml.h +++ b/ggml.h @@ -269,6 +269,7 @@ extern "C" { GGML_OP_DIAG_MASK_INF, GGML_OP_SOFT_MAX, GGML_OP_ROPE, + GGML_OP_ALIBI, GGML_OP_CONV_1D_1S, GGML_OP_CONV_1D_2S, @@ -662,6 +663,14 @@ extern "C" { int n_dims, int mode); + // alibi position embedding + // in-place, returns view(a) + struct ggml_tensor * ggml_alibi( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past, + int n_head); + // padding = 1 // TODO: we don't support extra parameters for now // that's why we are hard-coding the stride, padding, and dilation From 7f15c5c477d9933689a9d1c40794483e350c2f19 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 28 Apr 2023 21:32:52 +0300 Subject: [PATCH 8/8] readme : update hot topics --- README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index 2a20746c63b18..731f491ca22cd 100644 --- a/README.md +++ b/README.md @@ -9,10 +9,8 @@ Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++ **Hot topics:** +- [Roadmap May 2023](https://github.com/ggerganov/llama.cpp/discussions/1220) - [New quantization methods](https://github.com/ggerganov/llama.cpp#quantization) -- [Added LoRA support](https://github.com/ggerganov/llama.cpp/pull/820) -- [Add GPU support to ggml](https://github.com/ggerganov/llama.cpp/discussions/915) -- [Roadmap Apr 2023](https://github.com/ggerganov/llama.cpp/discussions/784) ## Description