diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 368b023497c49..b2d5e916e6739 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -1641,8 +1641,8 @@ template static __device__ __forceinline__ float vec_dot_q8_1_q8_1_imp #else const float2 dm8f = __half22float2(dm8); const float2 ds8f = __half22float2(ds8); - const float d8d8 = dm8f.x * ds8f.x; - const float m8s8 = dm8f.y * ds8f.y; + const float d8d8 = __low2float(dm8) * __low2float(ds8); + const float m8s8 = __high2float(dm8) * __high2float(ds8); #endif // GGML_CUDA_F16 // scale second part of sum by QI8_1/ vdr to compensate for multiple threads adding it @@ -3281,7 +3281,7 @@ static __global__ void mul_mat_q( *dsi_dst = *dsi_src; } else { float * dfi_dst = (float *) dsi_dst; - *dfi_dst = (*dsi_src).x; + *dfi_dst = __low2half(*dsi_src); } }