diff --git a/src/layer/arm/convolution_arm.cpp b/src/layer/arm/convolution_arm.cpp
index 4d379f65eae..b22b3bb08fc 100644
--- a/src/layer/arm/convolution_arm.cpp
+++ b/src/layer/arm/convolution_arm.cpp
@@ -1376,15 +1376,22 @@ int Convolution_arm::forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, con
 #if __ARM_NEON
     if (opt.use_packing_layout)
     {
-#if NCNN_ARM82
-        if (ncnn::cpu_support_arm_asimdhp() && opt.use_fp16_arithmetic)
+        if (use_int8_requantize)
         {
-            out_elempack_int32 = num_output % 8 == 0 ? 8 : num_output % 4 == 0 ? 4 : 1;
+            out_elempack_int32 = num_output % 8 == 0 ? 8 : 1;
         }
         else
-#endif // NCNN_ARM82
         {
-            out_elempack_int32 = num_output % 4 == 0 ? 4 : 1;
+#if NCNN_ARM82
+            if (ncnn::cpu_support_arm_asimdhp() && opt.use_fp16_arithmetic)
+            {
+                out_elempack_int32 = num_output % 8 == 0 ? 8 : num_output % 4 == 0 ? 4 : 1;
+            }
+            else
+#endif // NCNN_ARM82
+            {
+                out_elempack_int32 = num_output % 4 == 0 ? 4 : 1;
+            }
         }
     }
 #endif // __ARM_NEON
diff --git a/src/layer/arm/requantize_arm.cpp b/src/layer/arm/requantize_arm.cpp
index d5fe92428b3..d610d512b57 100644
--- a/src/layer/arm/requantize_arm.cpp
+++ b/src/layer/arm/requantize_arm.cpp
@@ -120,8 +120,8 @@ static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scal
         for (; i < size; i++)
         {
             float v = *intptr * scale;
+            if (v < 0) v = 0;
             *ptr = float2int8(v);
-            if (*ptr < 0) *ptr = 0;
             intptr++;
             ptr++;
         }
@@ -190,8 +190,8 @@ static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scal
         for (; i < size; i++)
         {
             float v = *intptr * scale + bias;
+            if (v < 0) v = 0;
             *ptr = float2int8(v);
-            if (*ptr < 0) *ptr = 0;
             intptr++;
             ptr++;
         }
@@ -288,8 +288,8 @@ static void requantize_leakyrelu(const int* intptr, signed char* ptr, const Mat&
         for (; i < size; i++)
         {
             float v = *intptr * scale;
+            if (v < 0) v *= slope;
             *ptr = float2int8(v);
-            if (*ptr < 0) *ptr *= slope;
             intptr++;
             ptr++;
         }
@@ -358,8 +358,8 @@ static void requantize_leakyrelu(const int* intptr, signed char* ptr, const Mat&
         for (; i < size; i++)
         {
             float v = *intptr * scale + bias;
+            if (v < 0) v *= slope;
             *ptr = float2int8(v);
-            if (*ptr < 0) *ptr *= slope;
             intptr++;
             ptr++;
         }
diff --git a/src/layer/loongarch/requantize_loongarch.cpp b/src/layer/loongarch/requantize_loongarch.cpp
index 9b7f2130cf4..0085bf71785 100644
--- a/src/layer/loongarch/requantize_loongarch.cpp
+++ b/src/layer/loongarch/requantize_loongarch.cpp
@@ -120,8 +120,8 @@ static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scal
         for (; i < size; i++)
         {
             float v = *intptr * scale;
+            if (v < 0) v = 0;
             *ptr = float2int8(v);
-            if (*ptr < 0) *ptr = 0;
             intptr++;
             ptr++;
         }
@@ -182,8 +182,8 @@ static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scal
         for (; i < size; i++)
         {
             float v = *intptr * scale + bias;
+            if (v < 0) v = 0;
             *ptr = float2int8(v);
-            if (*ptr < 0) *ptr = 0;
             intptr++;
             ptr++;
         }
@@ -281,8 +281,8 @@ static void requantize_leakyrelu(const int* intptr, signed char* ptr, const Mat&
         for (; i < size; i++)
         {
             float v = *intptr * scale;
+            if (v < 0) v *= slope;
             *ptr = float2int8(v);
-            if (*ptr < 0) *ptr *= slope;
             intptr++;
             ptr++;
         }
@@ -343,8 +343,8 @@ static void requantize_leakyrelu(const int* intptr, signed char* ptr, const Mat&
         for (; i < size; i++)
         {
             float v = *intptr * scale + bias;
+            if (v < 0) v *= slope;
             *ptr = float2int8(v);
-            if (*ptr < 0) *ptr *= slope;
             intptr++;
             ptr++;
         }
diff --git a/src/layer/mips/requantize_mips.cpp b/src/layer/mips/requantize_mips.cpp
index 9c502362d3a..374229fc54b 100644
--- a/src/layer/mips/requantize_mips.cpp
+++ b/src/layer/mips/requantize_mips.cpp
@@ -120,8 +120,8 @@ static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scal
         for (; i < size; i++)
         {
             float v = *intptr * scale;
+            if (v < 0) v = 0;
             *ptr = float2int8(v);
-            if (*ptr < 0) *ptr = 0;
             intptr++;
             ptr++;
         }
@@ -182,8 +182,8 @@ static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scal
         for (; i < size; i++)
         {
             float v = *intptr * scale + bias;
+            if (v < 0) v = 0;
             *ptr = float2int8(v);
-            if (*ptr < 0) *ptr = 0;
             intptr++;
             ptr++;
         }
@@ -281,8 +281,8 @@ static void requantize_leakyrelu(const int* intptr, signed char* ptr, const Mat&
         for (; i < size; i++)
         {
             float v = *intptr * scale;
+            if (v < 0) v *= slope;
             *ptr = float2int8(v);
-            if (*ptr < 0) *ptr *= slope;
             intptr++;
             ptr++;
         }
@@ -343,8 +343,8 @@ static void requantize_leakyrelu(const int* intptr, signed char* ptr, const Mat&
         for (; i < size; i++)
         {
             float v = *intptr * scale + bias;
+            if (v < 0) v *= slope;
             *ptr = float2int8(v);
-            if (*ptr < 0) *ptr *= slope;
             intptr++;
             ptr++;
         }
diff --git a/src/layer/x86/convolution_x86.cpp b/src/layer/x86/convolution_x86.cpp
index 528bd5fae75..dfb7f6bdd44 100644
--- a/src/layer/x86/convolution_x86.cpp
+++ b/src/layer/x86/convolution_x86.cpp
@@ -995,7 +995,11 @@ int Convolution_x86::forward_int8_x86(const Mat& bottom_blob, Mat& top_blob, con
     {
         if (use_int8_requantize)
         {
+#if __AVX__
             out_elempack_int32 = num_output % 8 == 0 ? 8 : 1;
+#else
+            out_elempack_int32 = num_output % 8 == 0 ? 4 : 1;
+#endif
         }
         else
         {
diff --git a/src/layer/x86/requantize_x86.cpp b/src/layer/x86/requantize_x86.cpp
index 6b64f86967d..a2e7385dac1 100644
--- a/src/layer/x86/requantize_x86.cpp
+++ b/src/layer/x86/requantize_x86.cpp
@@ -330,6 +330,82 @@ static void requantize(const int* intptr, signed char* ptr, const Mat& scale_in_
     }
 }
 
+#if __SSE2__
+#if !__AVX__
+static void requantize_pack4to8(const int* intptr0, const int* intptr1, signed char* ptr, const Mat& scale_in_data, const Mat& bias_data, const Mat& scale_out_data, int activation_type, const Mat& activation_params, int elemcount)
+{
+    const int scale_in_data_size = scale_in_data.w;
+    const int bias_data_size = bias_data.w;
+    const int scale_out_data_size = scale_out_data.w;
+
+    // NCNN_LOGE("requantize_pack4to8 %d %d %d   %d", scale_in_data_size, bias_data_size, scale_out_data_size, elemcount);
+
+    __m128 _scale_in0 = _mm_set1_ps(scale_in_data[0]);
+    __m128 _scale_in1 = _scale_in0;
+    if (scale_in_data_size > 1)
+    {
+        _scale_in0 = _mm_loadu_ps((const float*)scale_in_data);
+        _scale_in1 = _mm_loadu_ps((const float*)scale_in_data + 4);
+    }
+
+    __m128 _scale_out0 = _mm_set1_ps(scale_out_data[0]);
+    __m128 _scale_out1 = _scale_out0;
+    if (scale_out_data_size > 1)
+    {
+        _scale_out0 = _mm_loadu_ps((const float*)scale_out_data);
+        _scale_out1 = _mm_loadu_ps((const float*)scale_out_data + 4);
+    }
+
+    if (bias_data_size == 0)
+    {
+        int i = 0;
+        for (; i < elemcount; i++)
+        {
+            __m128 _v0 = _mm_cvtepi32_ps(_mm_loadu_si128((const __m128i*)intptr0));
+            __m128 _v1 = _mm_cvtepi32_ps(_mm_loadu_si128((const __m128i*)intptr1));
+            _v0 = _mm_mul_ps(_v0, _scale_in0);
+            _v1 = _mm_mul_ps(_v1, _scale_in1);
+            _v0 = activation_sse(_v0, activation_type, activation_params);
+            _v1 = activation_sse(_v1, activation_type, activation_params);
+            _v0 = _mm_mul_ps(_v0, _scale_out0);
+            _v1 = _mm_mul_ps(_v1, _scale_out1);
+            *(int64_t*)ptr = float2int8_sse(_v0, _v1);
+            intptr0 += 4;
+            intptr1 += 4;
+            ptr += 8;
+        }
+    }
+    else
+    {
+        __m128 _bias0 = _mm_set1_ps(bias_data[0]);
+        __m128 _bias1 = _bias0;
+        if (bias_data_size > 1)
+        {
+            _bias0 = _mm_loadu_ps((const float*)bias_data);
+            _bias1 = _mm_loadu_ps((const float*)bias_data + 4);
+        }
+
+        int i = 0;
+        for (; i < elemcount; i++)
+        {
+            __m128 _v0 = _mm_cvtepi32_ps(_mm_loadu_si128((const __m128i*)intptr0));
+            __m128 _v1 = _mm_cvtepi32_ps(_mm_loadu_si128((const __m128i*)intptr1));
+            _v0 = _mm_comp_fmadd_ps(_v0, _scale_in0, _bias0);
+            _v1 = _mm_comp_fmadd_ps(_v1, _scale_in1, _bias1);
+            _v0 = activation_sse(_v0, activation_type, activation_params);
+            _v1 = activation_sse(_v1, activation_type, activation_params);
+            _v0 = _mm_mul_ps(_v0, _scale_out0);
+            _v1 = _mm_mul_ps(_v1, _scale_out1);
+            *(int64_t*)ptr = float2int8_sse(_v0, _v1);
+            intptr0 += 4;
+            intptr1 += 4;
+            ptr += 8;
+        }
+    }
+}
+#endif // !__AVX__
+#endif // __SSE2__
+
 int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
 {
     const int dims = bottom_blob.dims;
@@ -337,11 +413,20 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
     const int h = bottom_blob.h;
     const int channels = bottom_blob.c;
     const int elempack = bottom_blob.elempack;
-    const size_t out_elemsize = elempack * 1u;
 
     if (dims == 1)
     {
-        top_blob.create(w, out_elemsize, elempack, opt.blob_allocator);
+        int out_elempack = 1;
+#if __SSE2__
+        if (opt.use_packing_layout)
+        {
+            out_elempack = w * elempack % 8 == 0 ? 8 : 1;
+        }
+#endif
+        const int outw = w * elempack / out_elempack;
+        const size_t out_elemsize = out_elempack * 1u;
+
+        top_blob.create(outw, out_elemsize, out_elempack, opt.blob_allocator);
         if (top_blob.empty())
             return -100;
 
@@ -368,41 +453,107 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
 
     if (dims == 2)
     {
-        top_blob.create(w, h, out_elemsize, elempack, opt.blob_allocator);
+        int out_elempack = 1;
+#if __SSE2__
+        if (opt.use_packing_layout)
+        {
+            out_elempack = h * elempack % 8 == 0 ? 8 : 1;
+        }
+#endif
+        const int outh = h * elempack / out_elempack;
+        const size_t out_elemsize = out_elempack * 1u;
+
+        top_blob.create(w, outh, out_elemsize, out_elempack, opt.blob_allocator);
         if (top_blob.empty())
             return -100;
 
-        #pragma omp parallel for num_threads(opt.num_threads)
-        for (int i = 0; i < h; i++)
+#if __SSE2__
+#if !__AVX__
+        if (elempack == 4 && out_elempack == 8)
         {
-            const int* intptr = bottom_blob.row<const int>(i);
-            signed char* ptr = top_blob.row<signed char>(i);
+            #pragma omp parallel for num_threads(opt.num_threads)
+            for (int i = 0; i < outh; i++)
+            {
+                const int* intptr0 = bottom_blob.row<const int>(i * 2);
+                const int* intptr1 = bottom_blob.row<const int>(i * 2 + 1);
+                signed char* ptr = top_blob.row<signed char>(i);
 
-            const Mat scale_in_data_i = scale_in_data_size > 1 ? scale_in_data.range(i * elempack, elempack) : scale_in_data;
-            const Mat bias_data_i = bias_data_size > 1 ? bias_data.range(i * elempack, elempack) : bias_data;
-            const Mat scale_out_data_i = scale_out_data_size > 1 ? scale_out_data.range(i * elempack, elempack) : scale_out_data;
+                const Mat scale_in_data_i = scale_in_data_size > 1 ? scale_in_data.range(i * out_elempack, out_elempack) : scale_in_data;
+                const Mat bias_data_i = bias_data_size > 1 ? bias_data.range(i * out_elempack, out_elempack) : bias_data;
+                const Mat scale_out_data_i = scale_out_data_size > 1 ? scale_out_data.range(i * out_elempack, out_elempack) : scale_out_data;
 
-            requantize(intptr, ptr, scale_in_data_i, bias_data_i, scale_out_data_i, activation_type, activation_params, w, elempack);
+                requantize_pack4to8(intptr0, intptr1, ptr, scale_in_data_i, bias_data_i, scale_out_data_i, activation_type, activation_params, w);
+            }
+        }
+#endif // !__AVX__
+#endif // __SSE2__
+        if (elempack == out_elempack)
+        {
+            #pragma omp parallel for num_threads(opt.num_threads)
+            for (int i = 0; i < h; i++)
+            {
+                const int* intptr = bottom_blob.row<const int>(i);
+                signed char* ptr = top_blob.row<signed char>(i);
+
+                const Mat scale_in_data_i = scale_in_data_size > 1 ? scale_in_data.range(i * elempack, elempack) : scale_in_data;
+                const Mat bias_data_i = bias_data_size > 1 ? bias_data.range(i * elempack, elempack) : bias_data;
+                const Mat scale_out_data_i = scale_out_data_size > 1 ? scale_out_data.range(i * elempack, elempack) : scale_out_data;
+
+                requantize(intptr, ptr, scale_in_data_i, bias_data_i, scale_out_data_i, activation_type, activation_params, w, elempack);
+            }
         }
     }
 
     if (dims == 3)
     {
-        top_blob.create(w, h, channels, out_elemsize, elempack, opt.blob_allocator);
+        int out_elempack = 1;
+#if __SSE2__
+        if (opt.use_packing_layout)
+        {
+            out_elempack = channels * elempack % 8 == 0 ? 8 : 1;
+        }
+#endif
+        const int outc = channels * elempack / out_elempack;
+        const size_t out_elemsize = out_elempack * 1u;
+
+        top_blob.create(w, h, outc, out_elemsize, out_elempack, opt.blob_allocator);
         if (top_blob.empty())
             return -100;
 
-        #pragma omp parallel for num_threads(opt.num_threads)
-        for (int q = 0; q < channels; q++)
+#if __SSE2__
+#if !__AVX__
+        if (elempack == 4 && out_elempack == 8)
+        {
+            #pragma omp parallel for num_threads(opt.num_threads)
+            for (int q = 0; q < outc; q++)
+            {
+                const int* intptr0 = bottom_blob.channel(q * 2);
+                const int* intptr1 = bottom_blob.channel(q * 2 + 1);
+                signed char* ptr = top_blob.channel(q);
+
+                const Mat scale_in_data_q = scale_in_data_size > 1 ? scale_in_data.range(q * out_elempack, out_elempack) : scale_in_data;
+                const Mat bias_data_q = bias_data_size > 1 ? bias_data.range(q * out_elempack, out_elempack) : bias_data;
+                const Mat scale_out_data_q = scale_out_data_size > 1 ? scale_out_data.range(q * out_elempack, out_elempack) : scale_out_data;
+
+                requantize_pack4to8(intptr0, intptr1, ptr, scale_in_data_q, bias_data_q, scale_out_data_q, activation_type, activation_params, w * h);
+            }
+        }
+#endif // !__AVX__
+#endif // __SSE2__
+        if (elempack == out_elempack)
         {
-            const int* intptr = bottom_blob.channel(q);
-            signed char* ptr = top_blob.channel(q);
+            #pragma omp parallel for num_threads(opt.num_threads)
+            for (int q = 0; q < channels; q++)
+            {
+                const int* intptr = bottom_blob.channel(q);
+                signed char* ptr = top_blob.channel(q);
 
-            const Mat scale_in_data_q = scale_in_data_size > 1 ? scale_in_data.range(q * elempack, elempack) : scale_in_data;
-            const Mat bias_data_q = bias_data_size > 1 ? bias_data.range(q * elempack, elempack) : bias_data;
-            const Mat scale_out_data_q = scale_out_data_size > 1 ? scale_out_data.range(q * elempack, elempack) : scale_out_data;
+                const Mat scale_in_data_q = scale_in_data_size > 1 ? scale_in_data.range(q * elempack, elempack) : scale_in_data;
+                const Mat bias_data_q = bias_data_size > 1 ? bias_data.range(q * elempack, elempack) : bias_data;
+                const Mat scale_out_data_q = scale_out_data_size > 1 ? scale_out_data.range(q * elempack, elempack) : scale_out_data;
 
-            requantize(intptr, ptr, scale_in_data_q, bias_data_q, scale_out_data_q, activation_type, activation_params, w * h, elempack);
+                requantize(intptr, ptr, scale_in_data_q, bias_data_q, scale_out_data_q, activation_type, activation_params, w * h, elempack);
+            }
         }
     }
 
diff --git a/tests/test_requantize.cpp b/tests/test_requantize.cpp
index 3e4fe148828..e9c37d739f7 100644
--- a/tests/test_requantize.cpp
+++ b/tests/test_requantize.cpp
@@ -14,7 +14,7 @@
 
 #include "testutil.h"
 
-static int test_requantize(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size, int activation_type, float alpha, float beta)
+static int test_requantize_pack1(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size, int activation_type, float alpha, float beta)
 {
     ncnn::ParamDict pd;
     pd.set(0, scale_in_data_size);
@@ -36,25 +36,25 @@ static int test_requantize(const ncnn::Mat& a, int scale_in_data_size, int scale
     Randomize(weights[0], 0.0001, 0.001);
     Randomize(weights[1], 10, 100);
 
-    int flag = TEST_LAYER_DISABLE_AUTO_INPUT_CASTING;
+    int flag = TEST_LAYER_DISABLE_AUTO_INPUT_CASTING | TEST_LAYER_DISABLE_AUTO_INPUT_PACKING;
     int ret = test_layer("Requantize", pd, weights, a, 1, 0, flag);
     if (ret != 0)
     {
-        fprintf(stderr, "test_requantize failed a.dims=%d a=(%d %d %d) scale_in_data_size=%d scale_out_data_size=%d bias_data_size=%d act=%d actparams=[%f,%f]\n", a.dims, a.w, a.h, a.c, scale_in_data_size, scale_out_data_size, bias_data_size, activation_type, activation_params[0], activation_params[1]);
+        fprintf(stderr, "test_requantize_pack1 failed a.dims=%d a=(%d %d %d) scale_in_data_size=%d scale_out_data_size=%d bias_data_size=%d act=%d actparams=[%f,%f]\n", a.dims, a.w, a.h, a.c, scale_in_data_size, scale_out_data_size, bias_data_size, activation_type, activation_params[0], activation_params[1]);
     }
 
     return ret;
 }
 
-static int test_requantize(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size)
+static int test_requantize_pack1(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size)
 {
     return 0
-           || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 0, 0.f, 0.f)
-           || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 1, 0.f, 0.f)
-           || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 2, RandomFloat(0, 1), 0.f)
-           || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 3, RandomFloat(-1, 0), RandomFloat(0, 1))
-           || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 4, 0.f, 0.f)
-           || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 5, 0.f, 0.f);
+           || test_requantize_pack1(a, scale_in_data_size, scale_out_data_size, bias_data_size, 0, 0.f, 0.f)
+           || test_requantize_pack1(a, scale_in_data_size, scale_out_data_size, bias_data_size, 1, 0.f, 0.f)
+           || test_requantize_pack1(a, scale_in_data_size, scale_out_data_size, bias_data_size, 2, RandomFloat(0, 1), 0.f)
+           || test_requantize_pack1(a, scale_in_data_size, scale_out_data_size, bias_data_size, 3, RandomFloat(-1, 0), RandomFloat(0, 1))
+           || test_requantize_pack1(a, scale_in_data_size, scale_out_data_size, bias_data_size, 4, 0.f, 0.f)
+           || test_requantize_pack1(a, scale_in_data_size, scale_out_data_size, bias_data_size, 5, 0.f, 0.f);
 }
 
 static int test_requantize_pack8(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size, int activation_type, float alpha, float beta)
@@ -103,94 +103,68 @@ static int test_requantize_pack8(const ncnn::Mat& a, int scale_in_data_size, int
 static int test_requantize_0()
 {
     return 0
-           || test_requantize(RandomIntMat(5, 7, 24), 1, 1, 24)
-           || test_requantize(RandomIntMat(5, 7, 24), 1, 1, 1)
-           || test_requantize(RandomIntMat(5, 7, 24), 1, 1, 0)
-           || test_requantize(RandomIntMat(5, 7, 24), 24, 24, 24)
-           || test_requantize(RandomIntMat(5, 7, 24), 24, 24, 1)
-           || test_requantize(RandomIntMat(5, 7, 24), 24, 24, 0)
-           || test_requantize(RandomIntMat(5, 7, 24), 1, 24, 24)
-           || test_requantize(RandomIntMat(5, 7, 24), 1, 24, 1)
-           || test_requantize(RandomIntMat(5, 7, 24), 1, 24, 0)
-           || test_requantize(RandomIntMat(5, 7, 24), 24, 1, 24)
-           || test_requantize(RandomIntMat(5, 7, 24), 24, 1, 1)
-           || test_requantize(RandomIntMat(5, 7, 24), 24, 1, 0)
-           || test_requantize(RandomIntMat(7, 9, 12), 1, 1, 12)
-           || test_requantize(RandomIntMat(7, 9, 12), 1, 1, 1)
-           || test_requantize(RandomIntMat(7, 9, 12), 1, 1, 0)
-           || test_requantize(RandomIntMat(7, 9, 12), 12, 12, 12)
-           || test_requantize(RandomIntMat(7, 9, 12), 12, 12, 1)
-           || test_requantize(RandomIntMat(7, 9, 12), 12, 12, 0)
-           || test_requantize(RandomIntMat(7, 9, 12), 1, 12, 12)
-           || test_requantize(RandomIntMat(7, 9, 12), 1, 12, 1)
-           || test_requantize(RandomIntMat(7, 9, 12), 1, 12, 0)
-           || test_requantize(RandomIntMat(7, 9, 12), 12, 1, 12)
-           || test_requantize(RandomIntMat(7, 9, 12), 12, 1, 1)
-           || test_requantize(RandomIntMat(7, 9, 12), 12, 1, 0)
-           || test_requantize(RandomIntMat(3, 5, 13), 1, 1, 13)
-           || test_requantize(RandomIntMat(3, 5, 13), 1, 1, 1)
-           || test_requantize(RandomIntMat(3, 5, 13), 1, 1, 0)
-           || test_requantize(RandomIntMat(3, 5, 13), 13, 13, 13)
-           || test_requantize(RandomIntMat(3, 5, 13), 13, 13, 1)
-           || test_requantize(RandomIntMat(3, 5, 13), 13, 13, 0)
-           || test_requantize(RandomIntMat(3, 5, 13), 1, 13, 13)
-           || test_requantize(RandomIntMat(3, 5, 13), 1, 13, 1)
-           || test_requantize(RandomIntMat(3, 5, 13), 1, 13, 0)
-           || test_requantize(RandomIntMat(3, 5, 13), 13, 1, 13)
-           || test_requantize(RandomIntMat(3, 5, 13), 13, 1, 1)
-           || test_requantize(RandomIntMat(3, 5, 13), 13, 1, 0);
+           || test_requantize_pack1(RandomIntMat(7, 9, 12), 1, 1, 12)
+           || test_requantize_pack1(RandomIntMat(7, 9, 12), 1, 1, 1)
+           || test_requantize_pack1(RandomIntMat(7, 9, 12), 1, 1, 0)
+           || test_requantize_pack1(RandomIntMat(7, 9, 12), 12, 12, 12)
+           || test_requantize_pack1(RandomIntMat(7, 9, 12), 12, 12, 1)
+           || test_requantize_pack1(RandomIntMat(7, 9, 12), 12, 12, 0)
+           || test_requantize_pack1(RandomIntMat(7, 9, 12), 1, 12, 12)
+           || test_requantize_pack1(RandomIntMat(7, 9, 12), 1, 12, 1)
+           || test_requantize_pack1(RandomIntMat(7, 9, 12), 1, 12, 0)
+           || test_requantize_pack1(RandomIntMat(7, 9, 12), 12, 1, 12)
+           || test_requantize_pack1(RandomIntMat(7, 9, 12), 12, 1, 1)
+           || test_requantize_pack1(RandomIntMat(7, 9, 12), 12, 1, 0)
+           || test_requantize_pack1(RandomIntMat(3, 5, 13), 1, 1, 13)
+           || test_requantize_pack1(RandomIntMat(3, 5, 13), 1, 1, 1)
+           || test_requantize_pack1(RandomIntMat(3, 5, 13), 1, 1, 0)
+           || test_requantize_pack1(RandomIntMat(3, 5, 13), 13, 13, 13)
+           || test_requantize_pack1(RandomIntMat(3, 5, 13), 13, 13, 1)
+           || test_requantize_pack1(RandomIntMat(3, 5, 13), 13, 13, 0)
+           || test_requantize_pack1(RandomIntMat(3, 5, 13), 1, 13, 13)
+           || test_requantize_pack1(RandomIntMat(3, 5, 13), 1, 13, 1)
+           || test_requantize_pack1(RandomIntMat(3, 5, 13), 1, 13, 0)
+           || test_requantize_pack1(RandomIntMat(3, 5, 13), 13, 1, 13)
+           || test_requantize_pack1(RandomIntMat(3, 5, 13), 13, 1, 1)
+           || test_requantize_pack1(RandomIntMat(3, 5, 13), 13, 1, 0);
 }
 
 static int test_requantize_1()
 {
     return 0
-           || test_requantize(RandomIntMat(15, 24), 1, 1, 24)
-           || test_requantize(RandomIntMat(15, 24), 1, 1, 1)
-           || test_requantize(RandomIntMat(15, 24), 1, 1, 0)
-           || test_requantize(RandomIntMat(15, 24), 24, 24, 24)
-           || test_requantize(RandomIntMat(15, 24), 24, 24, 1)
-           || test_requantize(RandomIntMat(15, 24), 24, 24, 0)
-           || test_requantize(RandomIntMat(15, 24), 1, 24, 24)
-           || test_requantize(RandomIntMat(15, 24), 1, 24, 1)
-           || test_requantize(RandomIntMat(15, 24), 1, 24, 0)
-           || test_requantize(RandomIntMat(15, 24), 24, 1, 24)
-           || test_requantize(RandomIntMat(15, 24), 24, 1, 1)
-           || test_requantize(RandomIntMat(15, 24), 24, 1, 0)
-           || test_requantize(RandomIntMat(17, 12), 1, 1, 12)
-           || test_requantize(RandomIntMat(17, 12), 1, 1, 1)
-           || test_requantize(RandomIntMat(17, 12), 1, 1, 0)
-           || test_requantize(RandomIntMat(17, 12), 12, 12, 12)
-           || test_requantize(RandomIntMat(17, 12), 12, 12, 1)
-           || test_requantize(RandomIntMat(17, 12), 12, 12, 0)
-           || test_requantize(RandomIntMat(17, 12), 1, 12, 12)
-           || test_requantize(RandomIntMat(17, 12), 1, 12, 1)
-           || test_requantize(RandomIntMat(17, 12), 1, 12, 0)
-           || test_requantize(RandomIntMat(17, 12), 12, 1, 12)
-           || test_requantize(RandomIntMat(17, 12), 12, 1, 1)
-           || test_requantize(RandomIntMat(17, 12), 12, 1, 0)
-           || test_requantize(RandomIntMat(19, 15), 1, 1, 15)
-           || test_requantize(RandomIntMat(19, 15), 1, 1, 1)
-           || test_requantize(RandomIntMat(19, 15), 1, 1, 0)
-           || test_requantize(RandomIntMat(19, 15), 15, 15, 15)
-           || test_requantize(RandomIntMat(19, 15), 15, 15, 1)
-           || test_requantize(RandomIntMat(19, 15), 15, 15, 0)
-           || test_requantize(RandomIntMat(19, 15), 1, 15, 15)
-           || test_requantize(RandomIntMat(19, 15), 1, 15, 1)
-           || test_requantize(RandomIntMat(19, 15), 1, 15, 0)
-           || test_requantize(RandomIntMat(19, 15), 15, 1, 15)
-           || test_requantize(RandomIntMat(19, 15), 15, 1, 1)
-           || test_requantize(RandomIntMat(19, 15), 15, 1, 0);
+           || test_requantize_pack1(RandomIntMat(17, 12), 1, 1, 12)
+           || test_requantize_pack1(RandomIntMat(17, 12), 1, 1, 1)
+           || test_requantize_pack1(RandomIntMat(17, 12), 1, 1, 0)
+           || test_requantize_pack1(RandomIntMat(17, 12), 12, 12, 12)
+           || test_requantize_pack1(RandomIntMat(17, 12), 12, 12, 1)
+           || test_requantize_pack1(RandomIntMat(17, 12), 12, 12, 0)
+           || test_requantize_pack1(RandomIntMat(17, 12), 1, 12, 12)
+           || test_requantize_pack1(RandomIntMat(17, 12), 1, 12, 1)
+           || test_requantize_pack1(RandomIntMat(17, 12), 1, 12, 0)
+           || test_requantize_pack1(RandomIntMat(17, 12), 12, 1, 12)
+           || test_requantize_pack1(RandomIntMat(17, 12), 12, 1, 1)
+           || test_requantize_pack1(RandomIntMat(17, 12), 12, 1, 0)
+           || test_requantize_pack1(RandomIntMat(19, 15), 1, 1, 15)
+           || test_requantize_pack1(RandomIntMat(19, 15), 1, 1, 1)
+           || test_requantize_pack1(RandomIntMat(19, 15), 1, 1, 0)
+           || test_requantize_pack1(RandomIntMat(19, 15), 15, 15, 15)
+           || test_requantize_pack1(RandomIntMat(19, 15), 15, 15, 1)
+           || test_requantize_pack1(RandomIntMat(19, 15), 15, 15, 0)
+           || test_requantize_pack1(RandomIntMat(19, 15), 1, 15, 15)
+           || test_requantize_pack1(RandomIntMat(19, 15), 1, 15, 1)
+           || test_requantize_pack1(RandomIntMat(19, 15), 1, 15, 0)
+           || test_requantize_pack1(RandomIntMat(19, 15), 15, 1, 15)
+           || test_requantize_pack1(RandomIntMat(19, 15), 15, 1, 1)
+           || test_requantize_pack1(RandomIntMat(19, 15), 15, 1, 0);
 }
 
 static int test_requantize_2()
 {
     return 0
-           || test_requantize(RandomIntMat(128), 1, 1, 1)
-           || test_requantize(RandomIntMat(128), 1, 1, 0)
-           || test_requantize(RandomIntMat(124), 1, 1, 1)
-           || test_requantize(RandomIntMat(124), 1, 1, 0)
-           || test_requantize(RandomIntMat(127), 1, 1, 1)
-           || test_requantize(RandomIntMat(127), 1, 1, 0);
+           || test_requantize_pack1(RandomIntMat(124), 1, 1, 1)
+           || test_requantize_pack1(RandomIntMat(124), 1, 1, 0)
+           || test_requantize_pack1(RandomIntMat(127), 1, 1, 1)
+           || test_requantize_pack1(RandomIntMat(127), 1, 1, 0);
 }
 
 static int test_requantize_3()
diff --git a/tests/test_requantize_oom.cpp b/tests/test_requantize_oom.cpp
new file mode 100644
index 00000000000..3b39ba16971
--- /dev/null
+++ b/tests/test_requantize_oom.cpp
@@ -0,0 +1,139 @@
+// Tencent is pleased to support the open source community by making ncnn available.
+//
+// Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved.
+//
+// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// https://opensource.org/licenses/BSD-3-Clause
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#include "testutil.h"
+
+static int test_requantize_pack1_oom(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size, int activation_type, float alpha, float beta)
+{
+    ncnn::ParamDict pd;
+    pd.set(0, scale_in_data_size);
+    pd.set(1, scale_out_data_size);
+    pd.set(2, bias_data_size);
+
+    ncnn::Mat activation_params(2);
+    activation_params[0] = alpha;
+    activation_params[1] = beta;
+    pd.set(3, activation_type);
+    pd.set(4, activation_params);
+
+    std::vector<ncnn::Mat> weights(bias_data_size ? 3 : 2);
+    weights[0] = RandomMat(scale_in_data_size);
+    weights[1] = RandomMat(scale_out_data_size);
+    if (bias_data_size)
+        weights[2] = RandomMat(bias_data_size);
+
+    Randomize(weights[0], 0.0001, 0.001);
+    Randomize(weights[1], 10, 100);
+
+    int flag = TEST_LAYER_DISABLE_AUTO_INPUT_CASTING | TEST_LAYER_DISABLE_AUTO_INPUT_PACKING;
+    int ret = test_layer_oom("Requantize", pd, weights, a, flag);
+    if (ret != 0)
+    {
+        fprintf(stderr, "test_requantize_pack1_oom failed a.dims=%d a=(%d %d %d) scale_in_data_size=%d scale_out_data_size=%d bias_data_size=%d act=%d actparams=[%f,%f]\n", a.dims, a.w, a.h, a.c, scale_in_data_size, scale_out_data_size, bias_data_size, activation_type, activation_params[0], activation_params[1]);
+    }
+
+    return ret;
+}
+
+static int test_requantize_pack1_oom(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size)
+{
+    return 0
+           || test_requantize_pack1_oom(a, scale_in_data_size, scale_out_data_size, bias_data_size, 0, 0.f, 0.f)
+           || test_requantize_pack1_oom(a, scale_in_data_size, scale_out_data_size, bias_data_size, 1, 0.f, 0.f)
+           || test_requantize_pack1_oom(a, scale_in_data_size, scale_out_data_size, bias_data_size, 2, RandomFloat(0, 1), 0.f)
+           || test_requantize_pack1_oom(a, scale_in_data_size, scale_out_data_size, bias_data_size, 3, RandomFloat(-1, 0), RandomFloat(0, 1))
+           || test_requantize_pack1_oom(a, scale_in_data_size, scale_out_data_size, bias_data_size, 4, 0.f, 0.f)
+           || test_requantize_pack1_oom(a, scale_in_data_size, scale_out_data_size, bias_data_size, 5, 0.f, 0.f);
+}
+
+static int test_requantize_pack8_oom(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size, int activation_type, float alpha, float beta)
+{
+    ncnn::ParamDict pd;
+    pd.set(0, scale_in_data_size);
+    pd.set(1, scale_out_data_size);
+    pd.set(2, bias_data_size);
+
+    ncnn::Mat activation_params(2);
+    activation_params[0] = alpha;
+    activation_params[1] = beta;
+    pd.set(3, activation_type);
+    pd.set(4, activation_params);
+
+    std::vector<ncnn::Mat> weights(bias_data_size ? 3 : 2);
+    weights[0] = RandomMat(scale_in_data_size);
+    weights[1] = RandomMat(scale_out_data_size);
+    if (bias_data_size)
+        weights[2] = RandomMat(bias_data_size);
+
+    Randomize(weights[0], 0.0001, 0.001);
+    Randomize(weights[1], 10, 100);
+
+    int flag = TEST_LAYER_DISABLE_AUTO_INPUT_CASTING | TEST_LAYER_ENABLE_FORCE_INPUT_PACK8;
+    int ret = test_layer_oom("Requantize", pd, weights, a, flag);
+    if (ret != 0)
+    {
+        fprintf(stderr, "test_requantize_pack8_oom failed a.dims=%d a=(%d %d %d) scale_in_data_size=%d scale_out_data_size=%d bias_data_size=%d act=%d actparams=[%f,%f]\n", a.dims, a.w, a.h, a.c, scale_in_data_size, scale_out_data_size, bias_data_size, activation_type, activation_params[0], activation_params[1]);
+    }
+
+    return ret;
+}
+
+static int test_requantize_pack8_oom(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size)
+{
+    return 0
+           || test_requantize_pack8_oom(a, scale_in_data_size, scale_out_data_size, bias_data_size, 0, 0.f, 0.f)
+           || test_requantize_pack8_oom(a, scale_in_data_size, scale_out_data_size, bias_data_size, 1, 0.f, 0.f)
+           || test_requantize_pack8_oom(a, scale_in_data_size, scale_out_data_size, bias_data_size, 2, RandomFloat(0, 1), 0.f)
+           || test_requantize_pack8_oom(a, scale_in_data_size, scale_out_data_size, bias_data_size, 3, RandomFloat(-1, 0), RandomFloat(0, 1))
+           || test_requantize_pack8_oom(a, scale_in_data_size, scale_out_data_size, bias_data_size, 4, 0.f, 0.f)
+           || test_requantize_pack8_oom(a, scale_in_data_size, scale_out_data_size, bias_data_size, 5, 0.f, 0.f);
+}
+
+static int test_requantize_0()
+{
+    return 0
+           || test_requantize_pack1_oom(RandomIntMat(7, 9, 12), 12, 12, 12)
+           || test_requantize_pack1_oom(RandomIntMat(3, 5, 13), 13, 13, 13);
+}
+
+static int test_requantize_1()
+{
+    return 0
+           || test_requantize_pack1_oom(RandomIntMat(17, 12), 12, 12, 12)
+           || test_requantize_pack1_oom(RandomIntMat(19, 15), 15, 15, 15);
+}
+
+static int test_requantize_2()
+{
+    return test_requantize_pack1_oom(RandomIntMat(124), 1, 1, 1);
+}
+
+static int test_requantize_3()
+{
+    return 0
+           || test_requantize_pack8_oom(RandomIntMat(5, 7, 24), 24, 24, 24)
+           || test_requantize_pack8_oom(RandomIntMat(15, 24), 24, 24, 24)
+           || test_requantize_pack8_oom(RandomIntMat(128), 1, 1, 1);
+}
+
+int main()
+{
+    SRAND(7767517);
+
+    return 0
+           || test_requantize_0()
+           || test_requantize_1()
+           || test_requantize_2()
+           || test_requantize_3();
+}