diff --git a/Include/arm_nnsupportfunctions.h b/Include/arm_nnsupportfunctions.h index 1e10be75..cd66d6cb 100644 --- a/Include/arm_nnsupportfunctions.h +++ b/Include/arm_nnsupportfunctions.h @@ -21,8 +21,8 @@ * Title: arm_nnsupportfunctions.h * Description: Public header file of support functions for CMSIS NN Library * - * $Date: 04 November 2024 - * $Revision: V.22.5.0 + * $Date: 08 November 2024 + * $Revision: V.22.6.0 * * Target : Arm(R) M-Profile Architecture * -------------------------------------------------------------------- */ @@ -72,8 +72,16 @@ extern "C" { // to not loose precision. #define MAX_COL_COUNT (512) -// By default this will have not effect. During compilation this may be set to __restrict, which may be beneficial for -// performance. See README.md for more intformation. +// Threshold for number of output channels that decide whether to convert a depthwise conv to a +// regular conv operation when number of input channels is one. +// Only applicable for processors with MVE extension. +#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) + #define CONVERT_DW_CONV_WITH_ONE_INPUT_CH_AND_OUTPUT_CH_ABOVE_THRESHOLD (8) +#else + #define CONVERT_DW_CONV_WITH_ONE_INPUT_CH_AND_OUTPUT_CH_ABOVE_THRESHOLD (1) + +// By default this will have no effect. During compilation this may be set to __restrict, +// which may be beneficial for performance. See README.md for more intformation. #ifndef OPTIONAL_RESTRICT_KEYWORD #define OPTIONAL_RESTRICT_KEYWORD #endif diff --git a/Source/ConvolutionFunctions/arm_depthwise_conv_get_buffer_sizes_s8.c b/Source/ConvolutionFunctions/arm_depthwise_conv_get_buffer_sizes_s8.c index b477982e..6190f5fe 100644 --- a/Source/ConvolutionFunctions/arm_depthwise_conv_get_buffer_sizes_s8.c +++ b/Source/ConvolutionFunctions/arm_depthwise_conv_get_buffer_sizes_s8.c @@ -21,8 +21,8 @@ * Title: arm_depthwise_conv_get_buffer_sizes_s8.c * Description: Collection of get buffer size functions for the various s8 convolution layer functions. * - * $Date: 17 April 2024 - * $Revision: V.1.2.0 + * $Date: 1 November 2024 + * $Revision: V.1.3.0 * * Target : Arm(R) M-Profile Architecture * @@ -40,6 +40,27 @@ * @{ */ +__STATIC_INLINE int32_t +arm_deptwise_conv_s8_one_in_ch_get_buffer_size_mve(const cmsis_nn_dw_conv_params *dw_conv_params, + const cmsis_nn_dims *input_dims, + const cmsis_nn_dims *filter_dims, + const cmsis_nn_dims *output_dims) +{ + const cmsis_nn_dims filter_conv_dims = {filter_dims->c, filter_dims->h, filter_dims->w, filter_dims->n}; + const cmsis_nn_conv_params conv_params = {dw_conv_params->input_offset, + dw_conv_params->output_offset, + dw_conv_params->stride, + dw_conv_params->padding, + dw_conv_params->dilation, + dw_conv_params->activation}; + + int32_t size = + arm_convolve_wrapper_s8_get_buffer_size_mve(&conv_params, input_dims, &filter_conv_dims, output_dims); + size += filter_dims->c * filter_dims->h * filter_dims->w * filter_dims->n; + + return size; +} + int32_t arm_depthwise_conv_s8_opt_get_buffer_size_mve(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims) { (void)input_dims; @@ -71,6 +92,13 @@ int32_t arm_depthwise_conv_wrapper_s8_get_buffer_size(const cmsis_nn_dw_conv_par { int32_t size = 0; +#if defined(ARM_MATH_MVEI) + if (input_dims->c == 1 && output_dims->c > CONVERT_DW_CONV_WITH_ONE_INPUT_CH_AND_OUTPUT_CH_ABOVE_THRESHOLD) + { + return arm_deptwise_conv_s8_one_in_ch_get_buffer_size_mve(dw_conv_params, input_dims, filter_dims, output_dims); + } +#endif + if (input_dims->c == output_dims->c && input_dims->n == 1 && dw_conv_params->dilation.w == 1 && dw_conv_params->dilation.h == 1) { @@ -121,6 +149,19 @@ int32_t arm_depthwise_conv_wrapper_s8_get_buffer_size_mve(const cmsis_nn_dw_conv size = arm_depthwise_conv_s8_opt_get_buffer_size_mve(input_dims, filter_dims); } + if (input_dims->c == 1 && output_dims->c > CONVERT_DW_CONV_WITH_ONE_INPUT_CH_AND_OUTPUT_CH_ABOVE_THRESHOLD) + { + const int32_t to_conv_size = + arm_deptwise_conv_s8_one_in_ch_get_buffer_size_mve(dw_conv_params, input_dims, filter_dims, output_dims); + + /* Special case since this is compiler dependent. + Note it is recommended to use arm_depthwise_conv_wrapper_s8_get_buffer_size() instead. */ + if (to_conv_size > size) + { + return to_conv_size; + } + } + return size; } diff --git a/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c b/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c index 9f479ecc..3913bdcc 100644 --- a/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c +++ b/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates + * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates * * SPDX-License-Identifier: Apache-2.0 * @@ -22,14 +22,15 @@ * Description: Wrapper API to select appropriate depthwise conv API based * on dimensions. * - * $Date: 13 January 2023 - * $Revision: V.2.1.0 + * $Date: 04 November 2024 + * $Revision: V.2.2.0 * * Target : Arm(R) M-Profile Architecture * * -------------------------------------------------------------------- */ #include "arm_nnfunctions.h" +#include "arm_nnsupportfunctions.h" /** * @ingroup Public @@ -40,6 +41,51 @@ * @{ */ +#if defined(ARM_MATH_MVEI) +static arm_cmsis_nn_status arm_depthwise_conv_to_conv_s8(const cmsis_nn_context *ctx, + const cmsis_nn_dw_conv_params *dw_conv_params, + const cmsis_nn_per_channel_quant_params *quant_params, + const cmsis_nn_dims *input_dims, + const int8_t *input, + const cmsis_nn_dims *filter_dims, + const int8_t *filter, + const cmsis_nn_dims *bias_dims, + const int32_t *bias, + const cmsis_nn_dims *output_dims, + int8_t *output) +{ + const cmsis_nn_conv_params conv_params = {dw_conv_params->input_offset, + dw_conv_params->output_offset, + dw_conv_params->stride, + dw_conv_params->padding, + dw_conv_params->dilation, + dw_conv_params->activation}; + const cmsis_nn_dims filter_output_dims = {filter_dims->c, filter_dims->h, filter_dims->w, filter_dims->n}; + int8_t *w_buf = + ctx->buf + arm_convolve_wrapper_s8_get_buffer_size(&conv_params, input_dims, &filter_output_dims, output_dims); + const uint32_t perm[4] = {3, 1, 2, 0}; + const cmsis_nn_transpose_params transpose_params = {4, perm}; + + arm_cmsis_nn_status status = arm_transpose_s8(filter, w_buf, filter_dims, &filter_output_dims, &transpose_params); + + if (status == ARM_CMSIS_NN_SUCCESS) + { + status = arm_convolve_wrapper_s8(ctx, + &conv_params, + quant_params, + input_dims, + input, + &filter_output_dims, + (const int8_t *)w_buf, + bias_dims, + bias, + output_dims, + output); + } + return status; +} +#endif + /* * s8 Depthwise conv wrapper function * @@ -59,6 +105,24 @@ arm_cmsis_nn_status arm_depthwise_conv_wrapper_s8(const cmsis_nn_context *ctx, int8_t *output) { arm_cmsis_nn_status status = ARM_CMSIS_NN_SUCCESS; + +#if defined(ARM_MATH_MVEI) + if (input_dims->c == 1 && output_dims->c > CONVERT_DW_CONV_WITH_ONE_INPUT_CH_AND_OUTPUT_CH_ABOVE_THRESHOLD) + { + return arm_depthwise_conv_to_conv_s8(ctx, + dw_conv_params, + quant_params, + input_dims, + input, + filter_dims, + filter, + bias_dims, + bias, + output_dims, + output); + } +#endif + if (1 == dw_conv_params->ch_mult && input_dims->n == 1 && dw_conv_params->dilation.w == 1 && dw_conv_params->dilation.h == 1) { diff --git a/Tests/UnitTest/TestCases/TestData/in_ch_one_out_ch_larger_one/biases_data.h b/Tests/UnitTest/TestCases/TestData/in_ch_one_out_ch_larger_one/biases_data.h new file mode 100644 index 00000000..6bad89b7 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/in_ch_one_out_ch_larger_one/biases_data.h @@ -0,0 +1,6 @@ +// Generated by test_settings.py using tensorflow version 2.17.0 (Keras version 3.5.0). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#include + +const int32_t in_ch_one_out_ch_larger_one_biases[1] = {-4565}; diff --git a/Tests/UnitTest/TestCases/TestData/in_ch_one_out_ch_larger_one/config_data.h b/Tests/UnitTest/TestCases/TestData/in_ch_one_out_ch_larger_one/config_data.h new file mode 100644 index 00000000..7da491ff --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/in_ch_one_out_ch_larger_one/config_data.h @@ -0,0 +1,25 @@ +// Generated by test_settings.py using tensorflow version 2.17.0 (Keras version 3.5.0). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#define IN_CH_ONE_OUT_CH_LARGER_ONE_OUT_CH 1 +#define IN_CH_ONE_OUT_CH_LARGER_ONE_IN_CH 1 +#define IN_CH_ONE_OUT_CH_LARGER_ONE_INPUT_W 7 +#define IN_CH_ONE_OUT_CH_LARGER_ONE_INPUT_H 7 +#define IN_CH_ONE_OUT_CH_LARGER_ONE_DST_SIZE 16 +#define IN_CH_ONE_OUT_CH_LARGER_ONE_INPUT_SIZE 49 +#define IN_CH_ONE_OUT_CH_LARGER_ONE_OUT_ACTIVATION_MIN -128 +#define IN_CH_ONE_OUT_CH_LARGER_ONE_OUT_ACTIVATION_MAX 127 +#define IN_CH_ONE_OUT_CH_LARGER_ONE_INPUT_BATCHES 1 +#define IN_CH_ONE_OUT_CH_LARGER_ONE_FILTER_X 3 +#define IN_CH_ONE_OUT_CH_LARGER_ONE_FILTER_Y 3 +#define IN_CH_ONE_OUT_CH_LARGER_ONE_STRIDE_X 2 +#define IN_CH_ONE_OUT_CH_LARGER_ONE_STRIDE_Y 2 +#define IN_CH_ONE_OUT_CH_LARGER_ONE_PAD_X 1 +#define IN_CH_ONE_OUT_CH_LARGER_ONE_PAD_Y 1 +#define IN_CH_ONE_OUT_CH_LARGER_ONE_OUTPUT_W 4 +#define IN_CH_ONE_OUT_CH_LARGER_ONE_OUTPUT_H 4 +#define IN_CH_ONE_OUT_CH_LARGER_ONE_CH_MULT 1 +#define IN_CH_ONE_OUT_CH_LARGER_ONE_INPUT_OFFSET 128 +#define IN_CH_ONE_OUT_CH_LARGER_ONE_OUTPUT_OFFSET 127 +#define IN_CH_ONE_OUT_CH_LARGER_ONE_DILATION_X 1 +#define IN_CH_ONE_OUT_CH_LARGER_ONE_DILATION_Y 1 diff --git a/Tests/UnitTest/TestCases/TestData/in_ch_one_out_ch_larger_one/input_data.h b/Tests/UnitTest/TestCases/TestData/in_ch_one_out_ch_larger_one/input_data.h new file mode 100644 index 00000000..0170204b --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/in_ch_one_out_ch_larger_one/input_data.h @@ -0,0 +1,9 @@ +// Generated by test_settings.py using tensorflow version 2.17.0 (Keras version 3.5.0). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#include + +const int8_t in_ch_one_out_ch_larger_one_input[49] = { + -65, 36, 56, -82, 109, 99, -113, -63, 47, -83, -100, 123, 46, 125, -52, 65, 12, + -55, 11, -85, 123, 97, -55, 79, 33, 39, -39, 64, -1, 89, -8, 17, -16, -90, + -66, 58, 126, 36, -52, 46, 66, -83, -125, -93, -52, -61, -14, -62, -76}; diff --git a/Tests/UnitTest/TestCases/TestData/in_ch_one_out_ch_larger_one/output_mult_data.h b/Tests/UnitTest/TestCases/TestData/in_ch_one_out_ch_larger_one/output_mult_data.h new file mode 100644 index 00000000..9594672d --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/in_ch_one_out_ch_larger_one/output_mult_data.h @@ -0,0 +1,6 @@ +// Generated by test_settings.py using tensorflow version 2.17.0 (Keras version 3.5.0). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#include + +const int32_t in_ch_one_out_ch_larger_one_output_mult[1] = {2129586399}; diff --git a/Tests/UnitTest/TestCases/TestData/in_ch_one_out_ch_larger_one/output_ref_data.h b/Tests/UnitTest/TestCases/TestData/in_ch_one_out_ch_larger_one/output_ref_data.h new file mode 100644 index 00000000..dbe39d97 --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/in_ch_one_out_ch_larger_one/output_ref_data.h @@ -0,0 +1,7 @@ +// Generated by test_settings.py using tensorflow version 2.17.0 (Keras version 3.5.0). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#include + +const int8_t in_ch_one_out_ch_larger_one_output_ref[16] = + {97, 22, 11, 36, 70, 24, 5, -68, 35, -27, 33, -2, 121, 38, 72, 72}; diff --git a/Tests/UnitTest/TestCases/TestData/in_ch_one_out_ch_larger_one/output_shift_data.h b/Tests/UnitTest/TestCases/TestData/in_ch_one_out_ch_larger_one/output_shift_data.h new file mode 100644 index 00000000..30d7ab8a --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/in_ch_one_out_ch_larger_one/output_shift_data.h @@ -0,0 +1,6 @@ +// Generated by test_settings.py using tensorflow version 2.17.0 (Keras version 3.5.0). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#include + +const int32_t in_ch_one_out_ch_larger_one_output_shift[1] = {-9}; diff --git a/Tests/UnitTest/TestCases/TestData/in_ch_one_out_ch_larger_one/test_data.h b/Tests/UnitTest/TestCases/TestData/in_ch_one_out_ch_larger_one/test_data.h new file mode 100644 index 00000000..416fa68a --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/in_ch_one_out_ch_larger_one/test_data.h @@ -0,0 +1,9 @@ +// Generated by test_settings.py using tensorflow version 2.17.0 (Keras version 3.5.0). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#include "biases_data.h" +#include "config_data.h" +#include "input_data.h" +#include "output_mult_data.h" +#include "output_ref_data.h" +#include "output_shift_data.h" +#include "weights_data.h" diff --git a/Tests/UnitTest/TestCases/TestData/in_ch_one_out_ch_larger_one/weights_data.h b/Tests/UnitTest/TestCases/TestData/in_ch_one_out_ch_larger_one/weights_data.h new file mode 100644 index 00000000..2d0df0ec --- /dev/null +++ b/Tests/UnitTest/TestCases/TestData/in_ch_one_out_ch_larger_one/weights_data.h @@ -0,0 +1,6 @@ +// Generated by test_settings.py using tensorflow version 2.17.0 (Keras version 3.5.0). +// Interpreter from tensorflow version 2.17.0 and revision v2.17.0-rc1-2-gad6d8cc177d. +#pragma once +#include + +const int8_t in_ch_one_out_ch_larger_one_weights[9] = {-65, -108, 97, 1, -127, -72, -124, -76, 79}; diff --git a/Tests/UnitTest/TestCases/test_arm_depthwise_conv_s8/Unity/unity_test_arm_depthwise_conv_s8.c b/Tests/UnitTest/TestCases/test_arm_depthwise_conv_s8/Unity/unity_test_arm_depthwise_conv_s8.c index 8023f78b..80fba367 100644 --- a/Tests/UnitTest/TestCases/test_arm_depthwise_conv_s8/Unity/unity_test_arm_depthwise_conv_s8.c +++ b/Tests/UnitTest/TestCases/test_arm_depthwise_conv_s8/Unity/unity_test_arm_depthwise_conv_s8.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates + * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates * * SPDX-License-Identifier: Apache-2.0 * @@ -55,3 +55,8 @@ void test_depthwise_dilation_arm_depthwise_conv_s8(void) { depthwise_dilation_ar void test_buffer_size_mve_arm_depthwise_conv_s8(void) { buffer_size_mve_arm_depthwise_conv_s8(); } void test_buffer_size_dsp_arm_depthwise_conv_s8(void) { buffer_size_dsp_arm_depthwise_conv_s8(); } + +void test_in_ch_one_out_ch_larger_one_arm_depthwise_conv_s8(void) +{ + in_ch_one_out_ch_larger_one_arm_depthwise_conv_s8(); +} diff --git a/Tests/UnitTest/TestCases/test_arm_depthwise_conv_s8/test_arm_depthwise_conv_s8.c b/Tests/UnitTest/TestCases/test_arm_depthwise_conv_s8/test_arm_depthwise_conv_s8.c index 2e2d8d5f..6ab30b2b 100644 --- a/Tests/UnitTest/TestCases/test_arm_depthwise_conv_s8/test_arm_depthwise_conv_s8.c +++ b/Tests/UnitTest/TestCases/test_arm_depthwise_conv_s8/test_arm_depthwise_conv_s8.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright 2010-2023 Arm Limited and/or its affiliates + * SPDX-FileCopyrightText: Copyright 2010-2024 Arm Limited and/or its affiliates * * SPDX-License-Identifier: Apache-2.0 * @@ -23,6 +23,7 @@ #include "../TestData/depthwise_dilation/test_data.h" #include "../TestData/depthwise_mult_batches/test_data.h" #include "../TestData/depthwise_null_bias_1/test_data.h" +#include "../TestData/in_ch_one_out_ch_larger_one/test_data.h" #include "../Utils/utils.h" #include "../Utils/validate.h" @@ -482,3 +483,87 @@ void buffer_size_dsp_arm_depthwise_conv_s8(void) TEST_ASSERT_EQUAL(wrapper_buf_size, dsp_wrapper_buf_size); #endif } + +void in_ch_one_out_ch_larger_one_arm_depthwise_conv_s8(void) +{ + const arm_cmsis_nn_status expected = ARM_CMSIS_NN_SUCCESS; + int8_t output[IN_CH_ONE_OUT_CH_LARGER_ONE_DST_SIZE] = {0}; + cmsis_nn_context ctx; + cmsis_nn_dw_conv_params dw_conv_params; + cmsis_nn_per_channel_quant_params quant_params; + cmsis_nn_dims input_dims; + cmsis_nn_dims filter_dims; + cmsis_nn_dims bias_dims = {}; + cmsis_nn_dims output_dims; + const int32_t output_ref_size = IN_CH_ONE_OUT_CH_LARGER_ONE_DST_SIZE; + const int32_t *bias_data = get_bias_address(in_ch_one_out_ch_larger_one_biases, IN_CH_ONE_OUT_CH_LARGER_ONE_OUT_CH); + const int8_t *kernel_data = in_ch_one_out_ch_larger_one_weights; + const int8_t *input_data = in_ch_one_out_ch_larger_one_input; + input_dims.n = IN_CH_ONE_OUT_CH_LARGER_ONE_INPUT_BATCHES; + input_dims.w = IN_CH_ONE_OUT_CH_LARGER_ONE_INPUT_W; + input_dims.h = IN_CH_ONE_OUT_CH_LARGER_ONE_INPUT_H; + input_dims.c = IN_CH_ONE_OUT_CH_LARGER_ONE_IN_CH; + filter_dims.n = IN_CH_ONE_OUT_CH_LARGER_ONE_IN_CH; + filter_dims.w = IN_CH_ONE_OUT_CH_LARGER_ONE_FILTER_X; + filter_dims.h = IN_CH_ONE_OUT_CH_LARGER_ONE_FILTER_Y; + filter_dims.c = IN_CH_ONE_OUT_CH_LARGER_ONE_OUT_CH; + output_dims.w = IN_CH_ONE_OUT_CH_LARGER_ONE_OUTPUT_W; + output_dims.h = IN_CH_ONE_OUT_CH_LARGER_ONE_OUTPUT_H; + output_dims.c = IN_CH_ONE_OUT_CH_LARGER_ONE_OUT_CH; + dw_conv_params.padding.w = IN_CH_ONE_OUT_CH_LARGER_ONE_PAD_X; + dw_conv_params.padding.h = IN_CH_ONE_OUT_CH_LARGER_ONE_PAD_Y; + dw_conv_params.stride.w = IN_CH_ONE_OUT_CH_LARGER_ONE_STRIDE_X; + dw_conv_params.stride.h = IN_CH_ONE_OUT_CH_LARGER_ONE_STRIDE_Y; + dw_conv_params.dilation.w = IN_CH_ONE_OUT_CH_LARGER_ONE_DILATION_X; + dw_conv_params.dilation.h = IN_CH_ONE_OUT_CH_LARGER_ONE_DILATION_Y; + dw_conv_params.ch_mult = IN_CH_ONE_OUT_CH_LARGER_ONE_CH_MULT; + dw_conv_params.input_offset = IN_CH_ONE_OUT_CH_LARGER_ONE_INPUT_OFFSET; + dw_conv_params.output_offset = IN_CH_ONE_OUT_CH_LARGER_ONE_OUTPUT_OFFSET; + dw_conv_params.activation.min = IN_CH_ONE_OUT_CH_LARGER_ONE_OUT_ACTIVATION_MIN; + dw_conv_params.activation.max = IN_CH_ONE_OUT_CH_LARGER_ONE_OUT_ACTIVATION_MAX; + quant_params.multiplier = (int32_t *)in_ch_one_out_ch_larger_one_output_mult; + quant_params.shift = (int32_t *)in_ch_one_out_ch_larger_one_output_shift; + ctx.buf = NULL; + ctx.size = 0; + arm_cmsis_nn_status result = arm_depthwise_conv_s8(&ctx, + &dw_conv_params, + &quant_params, + &input_dims, + input_data, + &filter_dims, + kernel_data, + &bias_dims, + bias_data, + &output_dims, + output); + if (ctx.buf) + { + memset(ctx.buf, 0, ctx.size); + free(ctx.buf); + } + TEST_ASSERT_EQUAL(expected, result); + TEST_ASSERT_TRUE(validate(output, in_ch_one_out_ch_larger_one_output_ref, output_ref_size)); + memset(output, 0, sizeof(output)); + const int32_t buf_size = + arm_depthwise_conv_wrapper_s8_get_buffer_size(&dw_conv_params, &input_dims, &filter_dims, &output_dims); + ctx.buf = malloc(buf_size); + ctx.size = buf_size; + result = arm_depthwise_conv_wrapper_s8(&ctx, + &dw_conv_params, + &quant_params, + &input_dims, + input_data, + &filter_dims, + kernel_data, + &bias_dims, + bias_data, + &output_dims, + output); + if (ctx.buf) + { + memset(ctx.buf, 0, buf_size); + free(ctx.buf); + } + TEST_ASSERT_EQUAL(expected, result); + TEST_ASSERT_TRUE(validate(output, in_ch_one_out_ch_larger_one_output_ref, output_ref_size)); +} diff --git a/Tests/UnitTest/generate_test_data.py b/Tests/UnitTest/generate_test_data.py index d5981c8c..b56e5cba 100755 --- a/Tests/UnitTest/generate_test_data.py +++ b/Tests/UnitTest/generate_test_data.py @@ -1619,7 +1619,7 @@ def load_testdata_sets(regenerate_input, regenerate_weights, regenerate_biases, interpreter=interpreter) type_of_test = 'depthwise_conv' - dataset = 'in_ch_one_out_ch_plus_eight' + dataset = 'in_ch_one_out_ch_larger_one' testdata_sets[dataset] = ConvSettings(dataset, type_of_test, regenerate_weights,