Convert f32 tensors to f16 as well.

ycros · Jan 29, 2024 · 4b93970 · 4b93970
1 parent 4970f35
commit 4b93970
Showing 1 changed file with 6 additions and 0 deletions.
diff --git a/llama.cpp b/llama.cpp
@@ -9539,10 +9539,16 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
             quantize = true;
         }
 
+
         enum ggml_type new_type;
         void * new_data;
         size_t new_size;
 
+        if (tensor->type == GGML_TYPE_F32) {
+            quantize = true;
+            new_type = GGML_TYPE_F16;
+        }
+
         if (quantize) {
             new_type = quantized_type;
             if (!params->pure) {