From 1a3e8ad6db149f87c7134c82c284949b61c73dd2 Mon Sep 17 00:00:00 2001 From: Aaron Miller Date: Fri, 30 Jun 2023 16:08:37 -0700 Subject: [PATCH] release metal buffers when freeing metal context --- ggml-metal.m | 4 +++- llama.cpp | 8 +++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/ggml-metal.m b/ggml-metal.m index 7551231b9cf32..fd69c41fe357d 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -202,7 +202,9 @@ @implementation GGMLMetalClass void ggml_metal_free(struct ggml_metal_context * ctx) { fprintf(stderr, "%s: deallocating\n", __func__); - + for (int i = 0; i < ctx->n_buffers; ++i) { + [ctx->buffers[i].metal release]; + } free(ctx); } diff --git a/llama.cpp b/llama.cpp index 049f73e4449cd..14421cb76a198 100644 --- a/llama.cpp +++ b/llama.cpp @@ -253,7 +253,13 @@ struct llama_model { struct llama_context { llama_context(const llama_model & model, const llama_vocab & vocab) : model(model), vocab(vocab), t_load_us(model.t_load_us), t_start_us(model.t_start_us) {} - +#ifdef GGML_USE_METAL + ~llama_context() { + if (ctx_metal) { + ggml_metal_free(ctx_metal); + } + } +#endif std::mt19937 rng; bool has_evaluated_once = false;