diff --git a/src/layer/arm/convolution_arm.cpp b/src/layer/arm/convolution_arm.cpp index 4d379f65eae..b22b3bb08fc 100644 --- a/src/layer/arm/convolution_arm.cpp +++ b/src/layer/arm/convolution_arm.cpp @@ -1376,15 +1376,22 @@ int Convolution_arm::forward_int8_arm(const Mat& bottom_blob, Mat& top_blob, con #if __ARM_NEON if (opt.use_packing_layout) { -#if NCNN_ARM82 - if (ncnn::cpu_support_arm_asimdhp() && opt.use_fp16_arithmetic) + if (use_int8_requantize) { - out_elempack_int32 = num_output % 8 == 0 ? 8 : num_output % 4 == 0 ? 4 : 1; + out_elempack_int32 = num_output % 8 == 0 ? 8 : 1; } else -#endif // NCNN_ARM82 { - out_elempack_int32 = num_output % 4 == 0 ? 4 : 1; +#if NCNN_ARM82 + if (ncnn::cpu_support_arm_asimdhp() && opt.use_fp16_arithmetic) + { + out_elempack_int32 = num_output % 8 == 0 ? 8 : num_output % 4 == 0 ? 4 : 1; + } + else +#endif // NCNN_ARM82 + { + out_elempack_int32 = num_output % 4 == 0 ? 4 : 1; + } } } #endif // __ARM_NEON diff --git a/src/layer/arm/requantize_arm.cpp b/src/layer/arm/requantize_arm.cpp index d5fe92428b3..d610d512b57 100644 --- a/src/layer/arm/requantize_arm.cpp +++ b/src/layer/arm/requantize_arm.cpp @@ -120,8 +120,8 @@ static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scal for (; i < size; i++) { float v = *intptr * scale; + if (v < 0) v = 0; *ptr = float2int8(v); - if (*ptr < 0) *ptr = 0; intptr++; ptr++; } @@ -190,8 +190,8 @@ static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scal for (; i < size; i++) { float v = *intptr * scale + bias; + if (v < 0) v = 0; *ptr = float2int8(v); - if (*ptr < 0) *ptr = 0; intptr++; ptr++; } @@ -288,8 +288,8 @@ static void requantize_leakyrelu(const int* intptr, signed char* ptr, const Mat& for (; i < size; i++) { float v = *intptr * scale; + if (v < 0) v *= slope; *ptr = float2int8(v); - if (*ptr < 0) *ptr *= slope; intptr++; ptr++; } @@ -358,8 +358,8 @@ static void requantize_leakyrelu(const int* intptr, signed char* ptr, const Mat& for (; i < size; i++) { float v = *intptr * scale + bias; + if (v < 0) v *= slope; *ptr = float2int8(v); - if (*ptr < 0) *ptr *= slope; intptr++; ptr++; } diff --git a/src/layer/loongarch/requantize_loongarch.cpp b/src/layer/loongarch/requantize_loongarch.cpp index 9b7f2130cf4..0085bf71785 100644 --- a/src/layer/loongarch/requantize_loongarch.cpp +++ b/src/layer/loongarch/requantize_loongarch.cpp @@ -120,8 +120,8 @@ static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scal for (; i < size; i++) { float v = *intptr * scale; + if (v < 0) v = 0; *ptr = float2int8(v); - if (*ptr < 0) *ptr = 0; intptr++; ptr++; } @@ -182,8 +182,8 @@ static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scal for (; i < size; i++) { float v = *intptr * scale + bias; + if (v < 0) v = 0; *ptr = float2int8(v); - if (*ptr < 0) *ptr = 0; intptr++; ptr++; } @@ -281,8 +281,8 @@ static void requantize_leakyrelu(const int* intptr, signed char* ptr, const Mat& for (; i < size; i++) { float v = *intptr * scale; + if (v < 0) v *= slope; *ptr = float2int8(v); - if (*ptr < 0) *ptr *= slope; intptr++; ptr++; } @@ -343,8 +343,8 @@ static void requantize_leakyrelu(const int* intptr, signed char* ptr, const Mat& for (; i < size; i++) { float v = *intptr * scale + bias; + if (v < 0) v *= slope; *ptr = float2int8(v); - if (*ptr < 0) *ptr *= slope; intptr++; ptr++; } diff --git a/src/layer/mips/requantize_mips.cpp b/src/layer/mips/requantize_mips.cpp index 9c502362d3a..374229fc54b 100644 --- a/src/layer/mips/requantize_mips.cpp +++ b/src/layer/mips/requantize_mips.cpp @@ -120,8 +120,8 @@ static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scal for (; i < size; i++) { float v = *intptr * scale; + if (v < 0) v = 0; *ptr = float2int8(v); - if (*ptr < 0) *ptr = 0; intptr++; ptr++; } @@ -182,8 +182,8 @@ static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scal for (; i < size; i++) { float v = *intptr * scale + bias; + if (v < 0) v = 0; *ptr = float2int8(v); - if (*ptr < 0) *ptr = 0; intptr++; ptr++; } @@ -281,8 +281,8 @@ static void requantize_leakyrelu(const int* intptr, signed char* ptr, const Mat& for (; i < size; i++) { float v = *intptr * scale; + if (v < 0) v *= slope; *ptr = float2int8(v); - if (*ptr < 0) *ptr *= slope; intptr++; ptr++; } @@ -343,8 +343,8 @@ static void requantize_leakyrelu(const int* intptr, signed char* ptr, const Mat& for (; i < size; i++) { float v = *intptr * scale + bias; + if (v < 0) v *= slope; *ptr = float2int8(v); - if (*ptr < 0) *ptr *= slope; intptr++; ptr++; } diff --git a/src/layer/x86/convolution_x86.cpp b/src/layer/x86/convolution_x86.cpp index 528bd5fae75..dfb7f6bdd44 100644 --- a/src/layer/x86/convolution_x86.cpp +++ b/src/layer/x86/convolution_x86.cpp @@ -995,7 +995,11 @@ int Convolution_x86::forward_int8_x86(const Mat& bottom_blob, Mat& top_blob, con { if (use_int8_requantize) { +#if __AVX__ out_elempack_int32 = num_output % 8 == 0 ? 8 : 1; +#else + out_elempack_int32 = num_output % 8 == 0 ? 4 : 1; +#endif } else { diff --git a/src/layer/x86/requantize_x86.cpp b/src/layer/x86/requantize_x86.cpp index 6b64f86967d..a2e7385dac1 100644 --- a/src/layer/x86/requantize_x86.cpp +++ b/src/layer/x86/requantize_x86.cpp @@ -330,6 +330,82 @@ static void requantize(const int* intptr, signed char* ptr, const Mat& scale_in_ } } +#if __SSE2__ +#if !__AVX__ +static void requantize_pack4to8(const int* intptr0, const int* intptr1, signed char* ptr, const Mat& scale_in_data, const Mat& bias_data, const Mat& scale_out_data, int activation_type, const Mat& activation_params, int elemcount) +{ + const int scale_in_data_size = scale_in_data.w; + const int bias_data_size = bias_data.w; + const int scale_out_data_size = scale_out_data.w; + + // NCNN_LOGE("requantize_pack4to8 %d %d %d %d", scale_in_data_size, bias_data_size, scale_out_data_size, elemcount); + + __m128 _scale_in0 = _mm_set1_ps(scale_in_data[0]); + __m128 _scale_in1 = _scale_in0; + if (scale_in_data_size > 1) + { + _scale_in0 = _mm_loadu_ps((const float*)scale_in_data); + _scale_in1 = _mm_loadu_ps((const float*)scale_in_data + 4); + } + + __m128 _scale_out0 = _mm_set1_ps(scale_out_data[0]); + __m128 _scale_out1 = _scale_out0; + if (scale_out_data_size > 1) + { + _scale_out0 = _mm_loadu_ps((const float*)scale_out_data); + _scale_out1 = _mm_loadu_ps((const float*)scale_out_data + 4); + } + + if (bias_data_size == 0) + { + int i = 0; + for (; i < elemcount; i++) + { + __m128 _v0 = _mm_cvtepi32_ps(_mm_loadu_si128((const __m128i*)intptr0)); + __m128 _v1 = _mm_cvtepi32_ps(_mm_loadu_si128((const __m128i*)intptr1)); + _v0 = _mm_mul_ps(_v0, _scale_in0); + _v1 = _mm_mul_ps(_v1, _scale_in1); + _v0 = activation_sse(_v0, activation_type, activation_params); + _v1 = activation_sse(_v1, activation_type, activation_params); + _v0 = _mm_mul_ps(_v0, _scale_out0); + _v1 = _mm_mul_ps(_v1, _scale_out1); + *(int64_t*)ptr = float2int8_sse(_v0, _v1); + intptr0 += 4; + intptr1 += 4; + ptr += 8; + } + } + else + { + __m128 _bias0 = _mm_set1_ps(bias_data[0]); + __m128 _bias1 = _bias0; + if (bias_data_size > 1) + { + _bias0 = _mm_loadu_ps((const float*)bias_data); + _bias1 = _mm_loadu_ps((const float*)bias_data + 4); + } + + int i = 0; + for (; i < elemcount; i++) + { + __m128 _v0 = _mm_cvtepi32_ps(_mm_loadu_si128((const __m128i*)intptr0)); + __m128 _v1 = _mm_cvtepi32_ps(_mm_loadu_si128((const __m128i*)intptr1)); + _v0 = _mm_comp_fmadd_ps(_v0, _scale_in0, _bias0); + _v1 = _mm_comp_fmadd_ps(_v1, _scale_in1, _bias1); + _v0 = activation_sse(_v0, activation_type, activation_params); + _v1 = activation_sse(_v1, activation_type, activation_params); + _v0 = _mm_mul_ps(_v0, _scale_out0); + _v1 = _mm_mul_ps(_v1, _scale_out1); + *(int64_t*)ptr = float2int8_sse(_v0, _v1); + intptr0 += 4; + intptr1 += 4; + ptr += 8; + } + } +} +#endif // !__AVX__ +#endif // __SSE2__ + int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const { const int dims = bottom_blob.dims; @@ -337,11 +413,20 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& const int h = bottom_blob.h; const int channels = bottom_blob.c; const int elempack = bottom_blob.elempack; - const size_t out_elemsize = elempack * 1u; if (dims == 1) { - top_blob.create(w, out_elemsize, elempack, opt.blob_allocator); + int out_elempack = 1; +#if __SSE2__ + if (opt.use_packing_layout) + { + out_elempack = w * elempack % 8 == 0 ? 8 : 1; + } +#endif + const int outw = w * elempack / out_elempack; + const size_t out_elemsize = out_elempack * 1u; + + top_blob.create(outw, out_elemsize, out_elempack, opt.blob_allocator); if (top_blob.empty()) return -100; @@ -368,41 +453,107 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& if (dims == 2) { - top_blob.create(w, h, out_elemsize, elempack, opt.blob_allocator); + int out_elempack = 1; +#if __SSE2__ + if (opt.use_packing_layout) + { + out_elempack = h * elempack % 8 == 0 ? 8 : 1; + } +#endif + const int outh = h * elempack / out_elempack; + const size_t out_elemsize = out_elempack * 1u; + + top_blob.create(w, outh, out_elemsize, out_elempack, opt.blob_allocator); if (top_blob.empty()) return -100; - #pragma omp parallel for num_threads(opt.num_threads) - for (int i = 0; i < h; i++) +#if __SSE2__ +#if !__AVX__ + if (elempack == 4 && out_elempack == 8) { - const int* intptr = bottom_blob.row(i); - signed char* ptr = top_blob.row(i); + #pragma omp parallel for num_threads(opt.num_threads) + for (int i = 0; i < outh; i++) + { + const int* intptr0 = bottom_blob.row(i * 2); + const int* intptr1 = bottom_blob.row(i * 2 + 1); + signed char* ptr = top_blob.row(i); - const Mat scale_in_data_i = scale_in_data_size > 1 ? scale_in_data.range(i * elempack, elempack) : scale_in_data; - const Mat bias_data_i = bias_data_size > 1 ? bias_data.range(i * elempack, elempack) : bias_data; - const Mat scale_out_data_i = scale_out_data_size > 1 ? scale_out_data.range(i * elempack, elempack) : scale_out_data; + const Mat scale_in_data_i = scale_in_data_size > 1 ? scale_in_data.range(i * out_elempack, out_elempack) : scale_in_data; + const Mat bias_data_i = bias_data_size > 1 ? bias_data.range(i * out_elempack, out_elempack) : bias_data; + const Mat scale_out_data_i = scale_out_data_size > 1 ? scale_out_data.range(i * out_elempack, out_elempack) : scale_out_data; - requantize(intptr, ptr, scale_in_data_i, bias_data_i, scale_out_data_i, activation_type, activation_params, w, elempack); + requantize_pack4to8(intptr0, intptr1, ptr, scale_in_data_i, bias_data_i, scale_out_data_i, activation_type, activation_params, w); + } + } +#endif // !__AVX__ +#endif // __SSE2__ + if (elempack == out_elempack) + { + #pragma omp parallel for num_threads(opt.num_threads) + for (int i = 0; i < h; i++) + { + const int* intptr = bottom_blob.row(i); + signed char* ptr = top_blob.row(i); + + const Mat scale_in_data_i = scale_in_data_size > 1 ? scale_in_data.range(i * elempack, elempack) : scale_in_data; + const Mat bias_data_i = bias_data_size > 1 ? bias_data.range(i * elempack, elempack) : bias_data; + const Mat scale_out_data_i = scale_out_data_size > 1 ? scale_out_data.range(i * elempack, elempack) : scale_out_data; + + requantize(intptr, ptr, scale_in_data_i, bias_data_i, scale_out_data_i, activation_type, activation_params, w, elempack); + } } } if (dims == 3) { - top_blob.create(w, h, channels, out_elemsize, elempack, opt.blob_allocator); + int out_elempack = 1; +#if __SSE2__ + if (opt.use_packing_layout) + { + out_elempack = channels * elempack % 8 == 0 ? 8 : 1; + } +#endif + const int outc = channels * elempack / out_elempack; + const size_t out_elemsize = out_elempack * 1u; + + top_blob.create(w, h, outc, out_elemsize, out_elempack, opt.blob_allocator); if (top_blob.empty()) return -100; - #pragma omp parallel for num_threads(opt.num_threads) - for (int q = 0; q < channels; q++) +#if __SSE2__ +#if !__AVX__ + if (elempack == 4 && out_elempack == 8) + { + #pragma omp parallel for num_threads(opt.num_threads) + for (int q = 0; q < outc; q++) + { + const int* intptr0 = bottom_blob.channel(q * 2); + const int* intptr1 = bottom_blob.channel(q * 2 + 1); + signed char* ptr = top_blob.channel(q); + + const Mat scale_in_data_q = scale_in_data_size > 1 ? scale_in_data.range(q * out_elempack, out_elempack) : scale_in_data; + const Mat bias_data_q = bias_data_size > 1 ? bias_data.range(q * out_elempack, out_elempack) : bias_data; + const Mat scale_out_data_q = scale_out_data_size > 1 ? scale_out_data.range(q * out_elempack, out_elempack) : scale_out_data; + + requantize_pack4to8(intptr0, intptr1, ptr, scale_in_data_q, bias_data_q, scale_out_data_q, activation_type, activation_params, w * h); + } + } +#endif // !__AVX__ +#endif // __SSE2__ + if (elempack == out_elempack) { - const int* intptr = bottom_blob.channel(q); - signed char* ptr = top_blob.channel(q); + #pragma omp parallel for num_threads(opt.num_threads) + for (int q = 0; q < channels; q++) + { + const int* intptr = bottom_blob.channel(q); + signed char* ptr = top_blob.channel(q); - const Mat scale_in_data_q = scale_in_data_size > 1 ? scale_in_data.range(q * elempack, elempack) : scale_in_data; - const Mat bias_data_q = bias_data_size > 1 ? bias_data.range(q * elempack, elempack) : bias_data; - const Mat scale_out_data_q = scale_out_data_size > 1 ? scale_out_data.range(q * elempack, elempack) : scale_out_data; + const Mat scale_in_data_q = scale_in_data_size > 1 ? scale_in_data.range(q * elempack, elempack) : scale_in_data; + const Mat bias_data_q = bias_data_size > 1 ? bias_data.range(q * elempack, elempack) : bias_data; + const Mat scale_out_data_q = scale_out_data_size > 1 ? scale_out_data.range(q * elempack, elempack) : scale_out_data; - requantize(intptr, ptr, scale_in_data_q, bias_data_q, scale_out_data_q, activation_type, activation_params, w * h, elempack); + requantize(intptr, ptr, scale_in_data_q, bias_data_q, scale_out_data_q, activation_type, activation_params, w * h, elempack); + } } } diff --git a/tests/test_requantize.cpp b/tests/test_requantize.cpp index 3e4fe148828..e9c37d739f7 100644 --- a/tests/test_requantize.cpp +++ b/tests/test_requantize.cpp @@ -14,7 +14,7 @@ #include "testutil.h" -static int test_requantize(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size, int activation_type, float alpha, float beta) +static int test_requantize_pack1(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size, int activation_type, float alpha, float beta) { ncnn::ParamDict pd; pd.set(0, scale_in_data_size); @@ -36,25 +36,25 @@ static int test_requantize(const ncnn::Mat& a, int scale_in_data_size, int scale Randomize(weights[0], 0.0001, 0.001); Randomize(weights[1], 10, 100); - int flag = TEST_LAYER_DISABLE_AUTO_INPUT_CASTING; + int flag = TEST_LAYER_DISABLE_AUTO_INPUT_CASTING | TEST_LAYER_DISABLE_AUTO_INPUT_PACKING; int ret = test_layer("Requantize", pd, weights, a, 1, 0, flag); if (ret != 0) { - fprintf(stderr, "test_requantize failed a.dims=%d a=(%d %d %d) scale_in_data_size=%d scale_out_data_size=%d bias_data_size=%d act=%d actparams=[%f,%f]\n", a.dims, a.w, a.h, a.c, scale_in_data_size, scale_out_data_size, bias_data_size, activation_type, activation_params[0], activation_params[1]); + fprintf(stderr, "test_requantize_pack1 failed a.dims=%d a=(%d %d %d) scale_in_data_size=%d scale_out_data_size=%d bias_data_size=%d act=%d actparams=[%f,%f]\n", a.dims, a.w, a.h, a.c, scale_in_data_size, scale_out_data_size, bias_data_size, activation_type, activation_params[0], activation_params[1]); } return ret; } -static int test_requantize(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size) +static int test_requantize_pack1(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size) { return 0 - || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 0, 0.f, 0.f) - || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 1, 0.f, 0.f) - || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 2, RandomFloat(0, 1), 0.f) - || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 3, RandomFloat(-1, 0), RandomFloat(0, 1)) - || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 4, 0.f, 0.f) - || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 5, 0.f, 0.f); + || test_requantize_pack1(a, scale_in_data_size, scale_out_data_size, bias_data_size, 0, 0.f, 0.f) + || test_requantize_pack1(a, scale_in_data_size, scale_out_data_size, bias_data_size, 1, 0.f, 0.f) + || test_requantize_pack1(a, scale_in_data_size, scale_out_data_size, bias_data_size, 2, RandomFloat(0, 1), 0.f) + || test_requantize_pack1(a, scale_in_data_size, scale_out_data_size, bias_data_size, 3, RandomFloat(-1, 0), RandomFloat(0, 1)) + || test_requantize_pack1(a, scale_in_data_size, scale_out_data_size, bias_data_size, 4, 0.f, 0.f) + || test_requantize_pack1(a, scale_in_data_size, scale_out_data_size, bias_data_size, 5, 0.f, 0.f); } static int test_requantize_pack8(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size, int activation_type, float alpha, float beta) @@ -103,94 +103,68 @@ static int test_requantize_pack8(const ncnn::Mat& a, int scale_in_data_size, int static int test_requantize_0() { return 0 - || test_requantize(RandomIntMat(5, 7, 24), 1, 1, 24) - || test_requantize(RandomIntMat(5, 7, 24), 1, 1, 1) - || test_requantize(RandomIntMat(5, 7, 24), 1, 1, 0) - || test_requantize(RandomIntMat(5, 7, 24), 24, 24, 24) - || test_requantize(RandomIntMat(5, 7, 24), 24, 24, 1) - || test_requantize(RandomIntMat(5, 7, 24), 24, 24, 0) - || test_requantize(RandomIntMat(5, 7, 24), 1, 24, 24) - || test_requantize(RandomIntMat(5, 7, 24), 1, 24, 1) - || test_requantize(RandomIntMat(5, 7, 24), 1, 24, 0) - || test_requantize(RandomIntMat(5, 7, 24), 24, 1, 24) - || test_requantize(RandomIntMat(5, 7, 24), 24, 1, 1) - || test_requantize(RandomIntMat(5, 7, 24), 24, 1, 0) - || test_requantize(RandomIntMat(7, 9, 12), 1, 1, 12) - || test_requantize(RandomIntMat(7, 9, 12), 1, 1, 1) - || test_requantize(RandomIntMat(7, 9, 12), 1, 1, 0) - || test_requantize(RandomIntMat(7, 9, 12), 12, 12, 12) - || test_requantize(RandomIntMat(7, 9, 12), 12, 12, 1) - || test_requantize(RandomIntMat(7, 9, 12), 12, 12, 0) - || test_requantize(RandomIntMat(7, 9, 12), 1, 12, 12) - || test_requantize(RandomIntMat(7, 9, 12), 1, 12, 1) - || test_requantize(RandomIntMat(7, 9, 12), 1, 12, 0) - || test_requantize(RandomIntMat(7, 9, 12), 12, 1, 12) - || test_requantize(RandomIntMat(7, 9, 12), 12, 1, 1) - || test_requantize(RandomIntMat(7, 9, 12), 12, 1, 0) - || test_requantize(RandomIntMat(3, 5, 13), 1, 1, 13) - || test_requantize(RandomIntMat(3, 5, 13), 1, 1, 1) - || test_requantize(RandomIntMat(3, 5, 13), 1, 1, 0) - || test_requantize(RandomIntMat(3, 5, 13), 13, 13, 13) - || test_requantize(RandomIntMat(3, 5, 13), 13, 13, 1) - || test_requantize(RandomIntMat(3, 5, 13), 13, 13, 0) - || test_requantize(RandomIntMat(3, 5, 13), 1, 13, 13) - || test_requantize(RandomIntMat(3, 5, 13), 1, 13, 1) - || test_requantize(RandomIntMat(3, 5, 13), 1, 13, 0) - || test_requantize(RandomIntMat(3, 5, 13), 13, 1, 13) - || test_requantize(RandomIntMat(3, 5, 13), 13, 1, 1) - || test_requantize(RandomIntMat(3, 5, 13), 13, 1, 0); + || test_requantize_pack1(RandomIntMat(7, 9, 12), 1, 1, 12) + || test_requantize_pack1(RandomIntMat(7, 9, 12), 1, 1, 1) + || test_requantize_pack1(RandomIntMat(7, 9, 12), 1, 1, 0) + || test_requantize_pack1(RandomIntMat(7, 9, 12), 12, 12, 12) + || test_requantize_pack1(RandomIntMat(7, 9, 12), 12, 12, 1) + || test_requantize_pack1(RandomIntMat(7, 9, 12), 12, 12, 0) + || test_requantize_pack1(RandomIntMat(7, 9, 12), 1, 12, 12) + || test_requantize_pack1(RandomIntMat(7, 9, 12), 1, 12, 1) + || test_requantize_pack1(RandomIntMat(7, 9, 12), 1, 12, 0) + || test_requantize_pack1(RandomIntMat(7, 9, 12), 12, 1, 12) + || test_requantize_pack1(RandomIntMat(7, 9, 12), 12, 1, 1) + || test_requantize_pack1(RandomIntMat(7, 9, 12), 12, 1, 0) + || test_requantize_pack1(RandomIntMat(3, 5, 13), 1, 1, 13) + || test_requantize_pack1(RandomIntMat(3, 5, 13), 1, 1, 1) + || test_requantize_pack1(RandomIntMat(3, 5, 13), 1, 1, 0) + || test_requantize_pack1(RandomIntMat(3, 5, 13), 13, 13, 13) + || test_requantize_pack1(RandomIntMat(3, 5, 13), 13, 13, 1) + || test_requantize_pack1(RandomIntMat(3, 5, 13), 13, 13, 0) + || test_requantize_pack1(RandomIntMat(3, 5, 13), 1, 13, 13) + || test_requantize_pack1(RandomIntMat(3, 5, 13), 1, 13, 1) + || test_requantize_pack1(RandomIntMat(3, 5, 13), 1, 13, 0) + || test_requantize_pack1(RandomIntMat(3, 5, 13), 13, 1, 13) + || test_requantize_pack1(RandomIntMat(3, 5, 13), 13, 1, 1) + || test_requantize_pack1(RandomIntMat(3, 5, 13), 13, 1, 0); } static int test_requantize_1() { return 0 - || test_requantize(RandomIntMat(15, 24), 1, 1, 24) - || test_requantize(RandomIntMat(15, 24), 1, 1, 1) - || test_requantize(RandomIntMat(15, 24), 1, 1, 0) - || test_requantize(RandomIntMat(15, 24), 24, 24, 24) - || test_requantize(RandomIntMat(15, 24), 24, 24, 1) - || test_requantize(RandomIntMat(15, 24), 24, 24, 0) - || test_requantize(RandomIntMat(15, 24), 1, 24, 24) - || test_requantize(RandomIntMat(15, 24), 1, 24, 1) - || test_requantize(RandomIntMat(15, 24), 1, 24, 0) - || test_requantize(RandomIntMat(15, 24), 24, 1, 24) - || test_requantize(RandomIntMat(15, 24), 24, 1, 1) - || test_requantize(RandomIntMat(15, 24), 24, 1, 0) - || test_requantize(RandomIntMat(17, 12), 1, 1, 12) - || test_requantize(RandomIntMat(17, 12), 1, 1, 1) - || test_requantize(RandomIntMat(17, 12), 1, 1, 0) - || test_requantize(RandomIntMat(17, 12), 12, 12, 12) - || test_requantize(RandomIntMat(17, 12), 12, 12, 1) - || test_requantize(RandomIntMat(17, 12), 12, 12, 0) - || test_requantize(RandomIntMat(17, 12), 1, 12, 12) - || test_requantize(RandomIntMat(17, 12), 1, 12, 1) - || test_requantize(RandomIntMat(17, 12), 1, 12, 0) - || test_requantize(RandomIntMat(17, 12), 12, 1, 12) - || test_requantize(RandomIntMat(17, 12), 12, 1, 1) - || test_requantize(RandomIntMat(17, 12), 12, 1, 0) - || test_requantize(RandomIntMat(19, 15), 1, 1, 15) - || test_requantize(RandomIntMat(19, 15), 1, 1, 1) - || test_requantize(RandomIntMat(19, 15), 1, 1, 0) - || test_requantize(RandomIntMat(19, 15), 15, 15, 15) - || test_requantize(RandomIntMat(19, 15), 15, 15, 1) - || test_requantize(RandomIntMat(19, 15), 15, 15, 0) - || test_requantize(RandomIntMat(19, 15), 1, 15, 15) - || test_requantize(RandomIntMat(19, 15), 1, 15, 1) - || test_requantize(RandomIntMat(19, 15), 1, 15, 0) - || test_requantize(RandomIntMat(19, 15), 15, 1, 15) - || test_requantize(RandomIntMat(19, 15), 15, 1, 1) - || test_requantize(RandomIntMat(19, 15), 15, 1, 0); + || test_requantize_pack1(RandomIntMat(17, 12), 1, 1, 12) + || test_requantize_pack1(RandomIntMat(17, 12), 1, 1, 1) + || test_requantize_pack1(RandomIntMat(17, 12), 1, 1, 0) + || test_requantize_pack1(RandomIntMat(17, 12), 12, 12, 12) + || test_requantize_pack1(RandomIntMat(17, 12), 12, 12, 1) + || test_requantize_pack1(RandomIntMat(17, 12), 12, 12, 0) + || test_requantize_pack1(RandomIntMat(17, 12), 1, 12, 12) + || test_requantize_pack1(RandomIntMat(17, 12), 1, 12, 1) + || test_requantize_pack1(RandomIntMat(17, 12), 1, 12, 0) + || test_requantize_pack1(RandomIntMat(17, 12), 12, 1, 12) + || test_requantize_pack1(RandomIntMat(17, 12), 12, 1, 1) + || test_requantize_pack1(RandomIntMat(17, 12), 12, 1, 0) + || test_requantize_pack1(RandomIntMat(19, 15), 1, 1, 15) + || test_requantize_pack1(RandomIntMat(19, 15), 1, 1, 1) + || test_requantize_pack1(RandomIntMat(19, 15), 1, 1, 0) + || test_requantize_pack1(RandomIntMat(19, 15), 15, 15, 15) + || test_requantize_pack1(RandomIntMat(19, 15), 15, 15, 1) + || test_requantize_pack1(RandomIntMat(19, 15), 15, 15, 0) + || test_requantize_pack1(RandomIntMat(19, 15), 1, 15, 15) + || test_requantize_pack1(RandomIntMat(19, 15), 1, 15, 1) + || test_requantize_pack1(RandomIntMat(19, 15), 1, 15, 0) + || test_requantize_pack1(RandomIntMat(19, 15), 15, 1, 15) + || test_requantize_pack1(RandomIntMat(19, 15), 15, 1, 1) + || test_requantize_pack1(RandomIntMat(19, 15), 15, 1, 0); } static int test_requantize_2() { return 0 - || test_requantize(RandomIntMat(128), 1, 1, 1) - || test_requantize(RandomIntMat(128), 1, 1, 0) - || test_requantize(RandomIntMat(124), 1, 1, 1) - || test_requantize(RandomIntMat(124), 1, 1, 0) - || test_requantize(RandomIntMat(127), 1, 1, 1) - || test_requantize(RandomIntMat(127), 1, 1, 0); + || test_requantize_pack1(RandomIntMat(124), 1, 1, 1) + || test_requantize_pack1(RandomIntMat(124), 1, 1, 0) + || test_requantize_pack1(RandomIntMat(127), 1, 1, 1) + || test_requantize_pack1(RandomIntMat(127), 1, 1, 0); } static int test_requantize_3() diff --git a/tests/test_requantize_oom.cpp b/tests/test_requantize_oom.cpp new file mode 100644 index 00000000000..3b39ba16971 --- /dev/null +++ b/tests/test_requantize_oom.cpp @@ -0,0 +1,139 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "testutil.h" + +static int test_requantize_pack1_oom(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size, int activation_type, float alpha, float beta) +{ + ncnn::ParamDict pd; + pd.set(0, scale_in_data_size); + pd.set(1, scale_out_data_size); + pd.set(2, bias_data_size); + + ncnn::Mat activation_params(2); + activation_params[0] = alpha; + activation_params[1] = beta; + pd.set(3, activation_type); + pd.set(4, activation_params); + + std::vector weights(bias_data_size ? 3 : 2); + weights[0] = RandomMat(scale_in_data_size); + weights[1] = RandomMat(scale_out_data_size); + if (bias_data_size) + weights[2] = RandomMat(bias_data_size); + + Randomize(weights[0], 0.0001, 0.001); + Randomize(weights[1], 10, 100); + + int flag = TEST_LAYER_DISABLE_AUTO_INPUT_CASTING | TEST_LAYER_DISABLE_AUTO_INPUT_PACKING; + int ret = test_layer_oom("Requantize", pd, weights, a, flag); + if (ret != 0) + { + fprintf(stderr, "test_requantize_pack1_oom failed a.dims=%d a=(%d %d %d) scale_in_data_size=%d scale_out_data_size=%d bias_data_size=%d act=%d actparams=[%f,%f]\n", a.dims, a.w, a.h, a.c, scale_in_data_size, scale_out_data_size, bias_data_size, activation_type, activation_params[0], activation_params[1]); + } + + return ret; +} + +static int test_requantize_pack1_oom(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size) +{ + return 0 + || test_requantize_pack1_oom(a, scale_in_data_size, scale_out_data_size, bias_data_size, 0, 0.f, 0.f) + || test_requantize_pack1_oom(a, scale_in_data_size, scale_out_data_size, bias_data_size, 1, 0.f, 0.f) + || test_requantize_pack1_oom(a, scale_in_data_size, scale_out_data_size, bias_data_size, 2, RandomFloat(0, 1), 0.f) + || test_requantize_pack1_oom(a, scale_in_data_size, scale_out_data_size, bias_data_size, 3, RandomFloat(-1, 0), RandomFloat(0, 1)) + || test_requantize_pack1_oom(a, scale_in_data_size, scale_out_data_size, bias_data_size, 4, 0.f, 0.f) + || test_requantize_pack1_oom(a, scale_in_data_size, scale_out_data_size, bias_data_size, 5, 0.f, 0.f); +} + +static int test_requantize_pack8_oom(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size, int activation_type, float alpha, float beta) +{ + ncnn::ParamDict pd; + pd.set(0, scale_in_data_size); + pd.set(1, scale_out_data_size); + pd.set(2, bias_data_size); + + ncnn::Mat activation_params(2); + activation_params[0] = alpha; + activation_params[1] = beta; + pd.set(3, activation_type); + pd.set(4, activation_params); + + std::vector weights(bias_data_size ? 3 : 2); + weights[0] = RandomMat(scale_in_data_size); + weights[1] = RandomMat(scale_out_data_size); + if (bias_data_size) + weights[2] = RandomMat(bias_data_size); + + Randomize(weights[0], 0.0001, 0.001); + Randomize(weights[1], 10, 100); + + int flag = TEST_LAYER_DISABLE_AUTO_INPUT_CASTING | TEST_LAYER_ENABLE_FORCE_INPUT_PACK8; + int ret = test_layer_oom("Requantize", pd, weights, a, flag); + if (ret != 0) + { + fprintf(stderr, "test_requantize_pack8_oom failed a.dims=%d a=(%d %d %d) scale_in_data_size=%d scale_out_data_size=%d bias_data_size=%d act=%d actparams=[%f,%f]\n", a.dims, a.w, a.h, a.c, scale_in_data_size, scale_out_data_size, bias_data_size, activation_type, activation_params[0], activation_params[1]); + } + + return ret; +} + +static int test_requantize_pack8_oom(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size) +{ + return 0 + || test_requantize_pack8_oom(a, scale_in_data_size, scale_out_data_size, bias_data_size, 0, 0.f, 0.f) + || test_requantize_pack8_oom(a, scale_in_data_size, scale_out_data_size, bias_data_size, 1, 0.f, 0.f) + || test_requantize_pack8_oom(a, scale_in_data_size, scale_out_data_size, bias_data_size, 2, RandomFloat(0, 1), 0.f) + || test_requantize_pack8_oom(a, scale_in_data_size, scale_out_data_size, bias_data_size, 3, RandomFloat(-1, 0), RandomFloat(0, 1)) + || test_requantize_pack8_oom(a, scale_in_data_size, scale_out_data_size, bias_data_size, 4, 0.f, 0.f) + || test_requantize_pack8_oom(a, scale_in_data_size, scale_out_data_size, bias_data_size, 5, 0.f, 0.f); +} + +static int test_requantize_0() +{ + return 0 + || test_requantize_pack1_oom(RandomIntMat(7, 9, 12), 12, 12, 12) + || test_requantize_pack1_oom(RandomIntMat(3, 5, 13), 13, 13, 13); +} + +static int test_requantize_1() +{ + return 0 + || test_requantize_pack1_oom(RandomIntMat(17, 12), 12, 12, 12) + || test_requantize_pack1_oom(RandomIntMat(19, 15), 15, 15, 15); +} + +static int test_requantize_2() +{ + return test_requantize_pack1_oom(RandomIntMat(124), 1, 1, 1); +} + +static int test_requantize_3() +{ + return 0 + || test_requantize_pack8_oom(RandomIntMat(5, 7, 24), 24, 24, 24) + || test_requantize_pack8_oom(RandomIntMat(15, 24), 24, 24, 24) + || test_requantize_pack8_oom(RandomIntMat(128), 1, 1, 1); +} + +int main() +{ + SRAND(7767517); + + return 0 + || test_requantize_0() + || test_requantize_1() + || test_requantize_2() + || test_requantize_3(); +}