From 4b93970c72d5953f24ad06fe35c2bb855a928611 Mon Sep 17 00:00:00 2001 From: Ycros <18012+ycros@users.noreply.github.com> Date: Mon, 29 Jan 2024 11:36:22 +0000 Subject: [PATCH] Convert f32 tensors to f16 as well. --- llama.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llama.cpp b/llama.cpp index e1e543aedbb94..4a67016c56d82 100644 --- a/llama.cpp +++ b/llama.cpp @@ -9539,10 +9539,16 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s quantize = true; } + enum ggml_type new_type; void * new_data; size_t new_size; + if (tensor->type == GGML_TYPE_F32) { + quantize = true; + new_type = GGML_TYPE_F16; + } + if (quantize) { new_type = quantized_type; if (!params->pure) {