Skip to content

Commit

Permalink
Make CLIP GPU acceleration work on UNIX / Windows
Browse files Browse the repository at this point in the history
In order to use ggml-backend safely with Cosmopolitan, this change
converts all its function pointered functions to use Microsoft ABI
  • Loading branch information
jart committed Jan 6, 2024
1 parent 2b1fef9 commit 20d5f46
Show file tree
Hide file tree
Showing 7 changed files with 111 additions and 108 deletions.
6 changes: 3 additions & 3 deletions build/config.mk
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#── vi: set noet ft=make ts=8 sw=8 fenc=utf-8 :vi ────────────────────┘

PREFIX = /usr/local
COSMOCC = cosmocc/3.2.1
COSMOCC = cosmocc/3.2.2
TOOLCHAIN = $(COSMOCC)/bin/cosmo

AR = $(TOOLCHAIN)ar
Expand Down Expand Up @@ -50,5 +50,5 @@ clean:; rm -rf o
.PHONY: distclean
distclean:; rm -rf o cosmocc

cosmocc/3.2.1:
build/download-cosmocc.sh $@ 3.2.1 c392eceb65736ab2b2700e3e0c3b05abbddd45ac9a7456c3e461f70d0ed8d01b
cosmocc/3.2.2:
build/download-cosmocc.sh $@ 3.2.2 9eb32720050b8c186a83be66b471059b3f5b76fc0d6a58bbed6a1cf8359b7b99
55 changes: 27 additions & 28 deletions llama.cpp/ggml-backend-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,14 @@ extern "C" {

// buffer type
typedef void * ggml_backend_buffer_type_context_t;

struct ggml_backend_buffer_type_i {
ggml_backend_buffer_t (*alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size);
size_t (*get_alignment) (ggml_backend_buffer_type_t buft); // tensor alignment
size_t (*get_alloc_size) (ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor); // data size needed to allocate the tensor, including padding
bool (*supports_backend)(ggml_backend_buffer_type_t buft, ggml_backend_t backend); // check if the buffer type is usable by the backend
ggml_backend_buffer_t (*GGML_BACKEND_ABI alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size);
size_t (*GGML_BACKEND_ABI get_alignment) (ggml_backend_buffer_type_t buft); // tensor alignment
size_t (*GGML_BACKEND_ABI get_alloc_size) (ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor); // data size needed to allocate the tensor, including padding
bool (*GGML_BACKEND_ABI supports_backend)(ggml_backend_buffer_type_t buft, ggml_backend_t backend); // check if the buffer type is usable by the backend
// check if tensor data is in host memory
// should be equivalent to supports_backend(buft, ggml_backend_cpu_init())
bool (*is_host) (ggml_backend_buffer_type_t buft);
bool (*GGML_BACKEND_ABI is_host) (ggml_backend_buffer_type_t buft);
};

struct ggml_backend_buffer_type {
Expand All @@ -34,16 +33,16 @@ extern "C" {
typedef void * ggml_backend_buffer_context_t;

struct ggml_backend_buffer_i {
void (*free_buffer) (ggml_backend_buffer_t buffer);
//void (*reset) (ggml_backend_buffer_t buffer); // reset any internal state due to tensor initialization, such as tensor extras
void * (*get_base) (ggml_backend_buffer_t buffer);
void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
void (*get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
void (*GGML_BACKEND_ABI free_buffer) (ggml_backend_buffer_t buffer);
//void (*GGML_BACKEND_ABI reset) (ggml_backend_buffer_t buffer); // reset any internal state due to tensor initialization, such as tensor extras
void * (*GGML_BACKEND_ABI get_base) (ggml_backend_buffer_t buffer);
void (*GGML_BACKEND_ABI init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
void (*GGML_BACKEND_ABI set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
void (*GGML_BACKEND_ABI get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
// (optional) copy tensor between different buffer-type, allow for single-copy tranfers
void (*cpy_tensor_from)(ggml_backend_buffer_t buffer, struct ggml_tensor * src, struct ggml_tensor * dst);
void (*cpy_tensor_to) (ggml_backend_buffer_t buffer, struct ggml_tensor * src, struct ggml_tensor * dst);
void (*clear) (ggml_backend_buffer_t buffer, uint8_t value);
void (*GGML_BACKEND_ABI cpy_tensor_from)(ggml_backend_buffer_t buffer, struct ggml_tensor * src, struct ggml_tensor * dst);
void (*GGML_BACKEND_ABI cpy_tensor_to) (ggml_backend_buffer_t buffer, struct ggml_tensor * src, struct ggml_tensor * dst);
void (*GGML_BACKEND_ABI clear) (ggml_backend_buffer_t buffer, uint8_t value);
};

struct ggml_backend_buffer {
Expand All @@ -67,33 +66,33 @@ extern "C" {
typedef void * ggml_backend_context_t;

struct ggml_backend_i {
const char * (*get_name)(ggml_backend_t backend);
const char * (*GGML_BACKEND_ABI get_name)(ggml_backend_t backend);

void (*free)(ggml_backend_t backend);
void (*GGML_BACKEND_ABI free)(ggml_backend_t backend);

// buffer allocation
ggml_backend_buffer_type_t (*get_default_buffer_type)(ggml_backend_t backend);
ggml_backend_buffer_type_t (*GGML_BACKEND_ABI get_default_buffer_type)(ggml_backend_t backend);

// (optional) asynchroneous tensor data access
void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
void (*GGML_BACKEND_ABI set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
void (*GGML_BACKEND_ABI get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);

// (optional) asynchroneous tensor copy
void (*cpy_tensor_from_async)(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
void (*cpy_tensor_to_async) (ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
void (*GGML_BACKEND_ABI cpy_tensor_from_async)(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
void (*GGML_BACKEND_ABI cpy_tensor_to_async) (ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);

void (*synchronize)(ggml_backend_t backend);
void (*GGML_BACKEND_ABI synchronize)(ggml_backend_t backend);

// compute graph with a plan
ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
ggml_backend_graph_plan_t (*GGML_BACKEND_ABI graph_plan_create) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
void (*GGML_BACKEND_ABI graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
void (*GGML_BACKEND_ABI graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);

// compute graph without a plan
void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
void (*GGML_BACKEND_ABI graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);

// check if the backend supports an operation
bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
bool (*GGML_BACKEND_ABI supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
};

struct ggml_backend {
Expand Down
38 changes: 19 additions & 19 deletions llama.cpp/ggml-backend.c
Original file line number Diff line number Diff line change
Expand Up @@ -393,39 +393,39 @@ ggml_backend_buffer_t ggml_backend_reg_alloc_buffer(size_t i, size_t size) {

// backend CPU

static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) {
GGML_BACKEND_ABI static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) {
return (void *)buffer->context;
}

static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) {
GGML_BACKEND_ABI static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) {
free(buffer->context);
}

static void ggml_backend_cpu_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
GGML_BACKEND_ABI static void ggml_backend_cpu_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
memcpy((char *)tensor->data + offset, data, size);

GGML_UNUSED(buffer);
}

static void ggml_backend_cpu_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
GGML_BACKEND_ABI static void ggml_backend_cpu_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
memcpy(data, (const char *)tensor->data + offset, size);

GGML_UNUSED(buffer);
}

static void ggml_backend_cpu_buffer_cpy_tensor_from(ggml_backend_buffer_t buffer, struct ggml_tensor * src, struct ggml_tensor * dst) {
GGML_BACKEND_ABI static void ggml_backend_cpu_buffer_cpy_tensor_from(ggml_backend_buffer_t buffer, struct ggml_tensor * src, struct ggml_tensor * dst) {
ggml_backend_tensor_get(src, dst->data, 0, ggml_nbytes(src));

GGML_UNUSED(buffer);
}

static void ggml_backend_cpu_buffer_cpy_tensor_to(ggml_backend_buffer_t buffer, struct ggml_tensor * src, struct ggml_tensor * dst) {
GGML_BACKEND_ABI static void ggml_backend_cpu_buffer_cpy_tensor_to(ggml_backend_buffer_t buffer, struct ggml_tensor * src, struct ggml_tensor * dst) {
ggml_backend_tensor_set(dst, src->data, 0, ggml_nbytes(src));

GGML_UNUSED(buffer);
}

static void ggml_backend_cpu_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
GGML_BACKEND_ABI static void ggml_backend_cpu_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
memset(buffer->context, value, buffer->size);
}

Expand Down Expand Up @@ -454,7 +454,7 @@ static struct ggml_backend_buffer_i cpu_backend_buffer_i_from_ptr = {

static const size_t TENSOR_ALIGNMENT = 64; // should be enough for AVX 512

static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
GGML_BACKEND_ABI static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
size += TENSOR_ALIGNMENT; // malloc may return an address that is not aligned
void * data = malloc(size); // TODO: maybe use GGML_ALIGNED_MALLOC?

Expand All @@ -463,19 +463,19 @@ static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_back
return ggml_backend_buffer_init(buft, cpu_backend_buffer_i, data, size);
}

static size_t ggml_backend_cpu_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
GGML_BACKEND_ABI static size_t ggml_backend_cpu_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
return TENSOR_ALIGNMENT;

GGML_UNUSED(buft);
}

static bool ggml_backend_cpu_buffer_type_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend) {
GGML_BACKEND_ABI static bool ggml_backend_cpu_buffer_type_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend) {
return ggml_backend_is_cpu(backend);

GGML_UNUSED(buft);
}

static bool ggml_backend_cpu_buffer_type_is_host(ggml_backend_buffer_type_t buft) {
GGML_BACKEND_ABI static bool ggml_backend_cpu_buffer_type_is_host(ggml_backend_buffer_type_t buft) {
return true;

GGML_UNUSED(buft);
Expand Down Expand Up @@ -545,20 +545,20 @@ struct ggml_backend_cpu_context {
size_t work_size;
};

static const char * ggml_backend_cpu_name(ggml_backend_t backend) {
GGML_BACKEND_ABI static const char * ggml_backend_cpu_name(ggml_backend_t backend) {
return "CPU";

GGML_UNUSED(backend);
}

static void ggml_backend_cpu_free(ggml_backend_t backend) {
GGML_BACKEND_ABI static void ggml_backend_cpu_free(ggml_backend_t backend) {
struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
free(cpu_ctx->work_data);
free(cpu_ctx);
free(backend);
}

static ggml_backend_buffer_type_t ggml_backend_cpu_get_default_buffer_type(ggml_backend_t backend) {
GGML_BACKEND_ABI static ggml_backend_buffer_type_t ggml_backend_cpu_get_default_buffer_type(ggml_backend_t backend) {
return ggml_backend_cpu_buffer_type();

GGML_UNUSED(backend);
Expand All @@ -569,7 +569,7 @@ struct ggml_backend_plan_cpu {
struct ggml_cgraph cgraph;
};

static ggml_backend_graph_plan_t ggml_backend_cpu_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
GGML_BACKEND_ABI static ggml_backend_graph_plan_t ggml_backend_cpu_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;

struct ggml_backend_plan_cpu * cpu_plan = malloc(sizeof(struct ggml_backend_plan_cpu));
Expand All @@ -584,7 +584,7 @@ static ggml_backend_graph_plan_t ggml_backend_cpu_graph_plan_create(ggml_backend
return cpu_plan;
}

static void ggml_backend_cpu_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
GGML_BACKEND_ABI static void ggml_backend_cpu_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;

free(cpu_plan->cplan.work_data);
Expand All @@ -593,15 +593,15 @@ static void ggml_backend_cpu_graph_plan_free(ggml_backend_t backend, ggml_backen
GGML_UNUSED(backend);
}

static void ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
GGML_BACKEND_ABI static void ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;

ggml_graph_compute(&cpu_plan->cgraph, &cpu_plan->cplan);

GGML_UNUSED(backend);
}

static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
GGML_BACKEND_ABI static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;

struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads);
Expand All @@ -617,7 +617,7 @@ static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_c
ggml_graph_compute(cgraph, &cplan);
}

static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
GGML_BACKEND_ABI static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
switch (op->op) {
case GGML_OP_MUL_MAT:
return op->src[1]->type == GGML_TYPE_F32 || op->src[1]->type == ggml_internal_get_type_traits(op->src[0]->type).vec_dot_type;
Expand Down
8 changes: 7 additions & 1 deletion llama.cpp/ggml-backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
#include "ggml.h"
#include "ggml-alloc.h"

#if defined(_WIN32)
#define GGML_BACKEND_ABI
#else
#define GGML_BACKEND_ABI __attribute__((__ms_abi__))
#endif

#ifdef __cplusplus
extern "C" {
#endif
Expand Down Expand Up @@ -173,7 +179,7 @@ extern "C" {
GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);

typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
typedef bool (*GGML_BACKEND_ABI ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);

// Compare the output of two backends
GGML_API void ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
Expand Down
Loading

0 comments on commit 20d5f46

Please sign in to comment.