diff --git a/src/cpu.cpp b/src/cpu.cpp index d624286c0c1..79497318c1f 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -129,9 +129,6 @@ static ncnn::CpuSet g_cpu_affinity_mask_big; // isa info #if defined _WIN32 -#if __arm__ -static int g_cpu_support_arm_neon; -static int g_cpu_support_arm_vfpv4; #if __aarch64__ static int g_cpu_support_arm_asimdhp; static int g_cpu_support_arm_cpuid; @@ -144,10 +141,11 @@ static int g_cpu_support_arm_sve2; static int g_cpu_support_arm_svebf16; static int g_cpu_support_arm_svei8mm; static int g_cpu_support_arm_svef32mm; -#else // __aarch64__ +#elif __arm__ static int g_cpu_support_arm_edsp; -#endif // __aarch64__ -#endif // __arm__ +static int g_cpu_support_arm_neon; +static int g_cpu_support_arm_vfpv4; +#endif // __aarch64__ || __arm__ #elif defined __ANDROID__ || defined __linux__ static unsigned int g_hwcaps; static unsigned int g_hwcaps2; @@ -2040,9 +2038,6 @@ static void initialize_global_cpu_info() g_is_being_debugged = is_being_debugged(); #if defined _WIN32 -#if __arm__ - g_cpu_support_arm_neon = 1; // all modern windows arm devices have neon - g_cpu_support_arm_vfpv4 = detectisa(some_vfpv4); #if __aarch64__ g_cpu_support_arm_cpuid = detectisa(some_cpuid); g_cpu_support_arm_asimdhp = detectisa(some_asimdhp) || IsProcessorFeaturePresent(43); // dp implies hp @@ -2055,10 +2050,11 @@ static void initialize_global_cpu_info() g_cpu_support_arm_svebf16 = detectisa(some_svebf16); g_cpu_support_arm_svei8mm = detectisa(some_svei8mm); g_cpu_support_arm_svef32mm = detectisa(some_svef32mm); -#else // __aarch64__ +#elif __arm__ g_cpu_support_arm_edsp = detectisa(some_edsp); -#endif // __aarch64__ -#endif // __arm__ + g_cpu_support_arm_neon = 1; // all modern windows arm devices have neon + g_cpu_support_arm_vfpv4 = detectisa(some_vfpv4); +#endif // __aarch64__ || __arm__ #elif defined __ANDROID__ || defined __linux__ g_hwcaps = get_elf_hwcap(AT_HWCAP); g_hwcaps2 = get_elf_hwcap(AT_HWCAP2); @@ -2271,21 +2267,15 @@ int cpu_support_arm_edsp() int cpu_support_arm_neon() { try_initialize_global_cpu_info(); -#if __arm__ +#if __aarch64__ + return 1; +#elif __arm__ #if defined _WIN32 return g_cpu_support_arm_neon; #elif defined __ANDROID__ || defined __linux__ -#if __aarch64__ - return g_hwcaps & HWCAP_ASIMD; -#else return g_hwcaps & HWCAP_NEON; -#endif #elif __APPLE__ -#if __aarch64__ - return g_hw_cputype == CPU_TYPE_ARM64; -#else return g_hw_cputype == CPU_TYPE_ARM && g_hw_cpusubtype > CPU_SUBTYPE_ARM_V7; -#endif #else return 0; #endif @@ -2297,22 +2287,15 @@ int cpu_support_arm_neon() int cpu_support_arm_vfpv4() { try_initialize_global_cpu_info(); -#if __arm__ +#if __aarch64__ + return 1; +#elif __arm__ #if defined _WIN32 return g_cpu_support_arm_vfpv4; #elif defined __ANDROID__ || defined __linux__ -#if __aarch64__ - // neon always enable fma and fp16 - return g_hwcaps & HWCAP_ASIMD; -#else return g_hwcaps & HWCAP_VFPv4; -#endif #elif __APPLE__ -#if __aarch64__ - return g_hw_cputype == CPU_TYPE_ARM64; -#else return g_hw_cputype == CPU_TYPE_ARM && g_hw_cpusubtype > CPU_SUBTYPE_ARM_V7S; -#endif #else return 0; #endif diff --git a/src/net.cpp b/src/net.cpp index 971a1b4276e..ff2ab609137 100644 --- a/src/net.cpp +++ b/src/net.cpp @@ -621,15 +621,15 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio // clang-format off // *INDENT-OFF* -#if NCNN_ARM82 - if (opt.use_fp16_storage && cpu_support_arm_asimdhp() && layer->support_fp16_storage) +#if NCNN_VFPV4 + if (opt.use_fp16_storage && cpu_support_arm_vfpv4() && layer->support_fp16_storage) { Mat bottom_blob_fp16; cast_float32_to_float16(bottom_blob, bottom_blob_fp16, opt); bottom_blob = bottom_blob_fp16; } else -#endif // NCNN_ARM82 +#endif // NCNN_VFPV4 #if NCNN_RVV if (opt.use_fp16_storage && cpu_support_riscv_v() && cpu_support_riscv_zfh() && layer->support_fp16_storage) { @@ -731,15 +731,15 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio // clang-format off // *INDENT-OFF* -#if NCNN_ARM82 - if (opt.use_fp16_storage && cpu_support_arm_asimdhp() && !layer->support_fp16_storage) +#if NCNN_VFPV4 + if (opt.use_fp16_storage && cpu_support_arm_vfpv4() && !layer->support_fp16_storage) { Mat bottom_blob_fp32; cast_float16_to_float32(bottom_blob, bottom_blob_fp32, opt); bottom_blob = bottom_blob_fp32; } else -#endif // NCNN_ARM82 +#endif // NCNN_VFPV4 #if NCNN_RVV if (opt.use_fp16_storage && cpu_support_riscv_v() && cpu_support_riscv_zfh() && !layer->support_fp16_storage) { @@ -2691,8 +2691,8 @@ int Extractor::extract(int blob_index, Mat& feat, int type) // clang-format off // *INDENT-OFF* -#if NCNN_ARM82 - if (d->opt.use_fp16_storage && cpu_support_arm_asimdhp() && (type == 0)) +#if NCNN_VFPV4 + if (d->opt.use_fp16_storage && cpu_support_arm_vfpv4() && (type == 0)) { if (feat.elembits() == 16) { @@ -2702,7 +2702,7 @@ int Extractor::extract(int blob_index, Mat& feat, int type) } } else -#endif // NCNN_ARM82 +#endif // NCNN_VFPV4 #if NCNN_BF16 if (d->opt.use_bf16_storage && (type == 0)) { diff --git a/tests/testutil.cpp b/tests/testutil.cpp index b453d1f61b4..f0bf3c51a20 100644 --- a/tests/testutil.cpp +++ b/tests/testutil.cpp @@ -446,13 +446,13 @@ int test_layer_cpu(int typeindex, const ncnn::ParamDict& pd, const std::vectorsupport_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) +#if NCNN_VFPV4 + if (opt.use_fp16_storage && ncnn::cpu_support_arm_vfpv4() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) { ncnn::cast_float32_to_float16(a[i], a4[i], opt); } else -#endif // NCNN_ARM82 +#endif // NCNN_VFPV4 #if NCNN_RVV if (opt.use_fp16_storage && ncnn::cpu_support_riscv_v() && ncnn::cpu_support_riscv_zfh() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) { @@ -571,15 +571,15 @@ int test_layer_cpu(int typeindex, const ncnn::ParamDict& pd, const std::vectorsupport_fp16_storage && c[i].elembits() == 16) +#if NCNN_VFPV4 + if (opt.use_fp16_storage && ncnn::cpu_support_arm_vfpv4() && op->support_fp16_storage && c[i].elembits() == 16) { ncnn::Mat c_fp32; ncnn::cast_float16_to_float32(c[i], c_fp32, opt); c[i] = c_fp32; } else -#endif // NCNN_ARM82 +#endif // NCNN_VFPV4 #if NCNN_RVV if (opt.use_fp16_storage && ncnn::cpu_support_riscv_v() && ncnn::cpu_support_riscv_zfh() && op->support_fp16_storage && c[i].elembits() == 16) { @@ -961,13 +961,13 @@ int test_layer_cpu(int typeindex, const ncnn::ParamDict& pd, const std::vectorsupport_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) +#if NCNN_VFPV4 + if (opt.use_fp16_storage && ncnn::cpu_support_arm_vfpv4() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) { ncnn::cast_float32_to_float16(a, a4, opt); } else -#endif // NCNN_ARM82 +#endif // NCNN_VFPV4 #if NCNN_RVV if (opt.use_fp16_storage && ncnn::cpu_support_riscv_v() && ncnn::cpu_support_riscv_zfh() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) { @@ -1077,15 +1077,15 @@ int test_layer_cpu(int typeindex, const ncnn::ParamDict& pd, const std::vectorsupport_fp16_storage && c.elembits() == 16) +#if NCNN_VFPV4 + if (opt.use_fp16_storage && ncnn::cpu_support_arm_vfpv4() && op->support_fp16_storage && c.elembits() == 16) { ncnn::Mat c_fp32; ncnn::cast_float16_to_float32(c, c_fp32, opt); c = c_fp32; } else -#endif // NCNN_ARM82 +#endif // NCNN_VFPV4 #if NCNN_RVV if (opt.use_fp16_storage && ncnn::cpu_support_riscv_v() && ncnn::cpu_support_riscv_zfh() && op->support_fp16_storage && c.elembits() == 16) {