From 97ff5919e40d8eda2ab3aeb99b56caa18b86a876 Mon Sep 17 00:00:00 2001 From: nihui Date: Sat, 16 Nov 2024 19:15:55 +0800 Subject: [PATCH] port xtheadvector recp rsqrt trunc --- src/layer/riscv/gru_riscv.cpp | 2 ++ src/layer/riscv/rvv_mathfun.h | 6 +++--- src/layer/riscv/rvv_mathfun_fp16s.h | 6 +++--- src/layer/riscv/unaryop_riscv.cpp | 8 ++++---- src/layer/riscv/unaryop_riscv_zvfh.cpp | 8 ++++---- toolchains/riscv64-unknown-linux-gnu.llvm-toolchain.cmake | 4 ++-- 6 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/layer/riscv/gru_riscv.cpp b/src/layer/riscv/gru_riscv.cpp index 47f6dee9118..46254d70b96 100644 --- a/src/layer/riscv/gru_riscv.cpp +++ b/src/layer/riscv/gru_riscv.cpp @@ -18,6 +18,8 @@ #include #endif // __riscv_vector +#include "cpu.h" + namespace ncnn { //core rvv-optimized gru impl. diff --git a/src/layer/riscv/rvv_mathfun.h b/src/layer/riscv/rvv_mathfun.h index 1f64f5a3a5a..107492c832b 100644 --- a/src/layer/riscv/rvv_mathfun.h +++ b/src/layer/riscv/rvv_mathfun.h @@ -365,7 +365,7 @@ _RVV_FLOAT32_POW_OP(2, 16) _RVV_FLOAT32_POW_OP(4, 8) _RVV_FLOAT32_POW_OP(8, 4) -#if C906 +#if __riscv_xtheadvector #define _RVV_FLOAT32_SIGMOID_OP(LMUL, MLEN) \ static inline vfloat32m##LMUL##_t sigmoid_ps(vfloat32m##LMUL##_t _v, size_t vl) \ { \ @@ -377,7 +377,7 @@ _RVV_FLOAT32_POW_OP(8, 4) /* _reciprocal = __riscv_vfmul_vv_f32m##LMUL(__riscv_vfrsub_vf_f32m##LMUL(__riscv_vfmul_vv_f32m##LMUL(_v, _reciprocal, vl), 2.f, vl), _reciprocal, vl); */ \ return _reciprocal; \ } -#else // C906 +#else // __riscv_xtheadvector #define _RVV_FLOAT32_SIGMOID_OP(LMUL, MLEN) \ static inline vfloat32m##LMUL##_t sigmoid_ps(vfloat32m##LMUL##_t _v, size_t vl) \ { \ @@ -389,7 +389,7 @@ _RVV_FLOAT32_POW_OP(8, 4) /* _reciprocal = __riscv_vfmul_vv_f32m##LMUL(__riscv_vfrsub_vf_f32m##LMUL(__riscv_vfmul_vv_f32m##LMUL(_v, _reciprocal, vl), 2.f, vl), _reciprocal, vl); */ \ return _reciprocal; \ } -#endif // C906 +#endif // __riscv_xtheadvector _RVV_FLOAT32_SIGMOID_OP(1, 32) _RVV_FLOAT32_SIGMOID_OP(2, 16) diff --git a/src/layer/riscv/rvv_mathfun_fp16s.h b/src/layer/riscv/rvv_mathfun_fp16s.h index f005e521ac7..ade4b05f237 100644 --- a/src/layer/riscv/rvv_mathfun_fp16s.h +++ b/src/layer/riscv/rvv_mathfun_fp16s.h @@ -365,7 +365,7 @@ _RVV_FLOAT16_POW_OP(2, 8) _RVV_FLOAT16_POW_OP(4, 4) _RVV_FLOAT16_POW_OP(8, 2) -#if C906 +#if __riscv_xtheadvector #define _RVV_FLOAT16_SIGMOID_OP(LMUL, MLEN) \ static inline vfloat16m##LMUL##_t sigmoid_ps(vfloat16m##LMUL##_t _v, size_t vl) \ { \ @@ -377,7 +377,7 @@ _RVV_FLOAT16_POW_OP(8, 2) /* _reciprocal = __riscv_vfmul_vv_f16m##LMUL(__riscv_vfrsub_vf_f16m##LMUL(__riscv_vfmul_vv_f16m##LMUL(_v, _reciprocal, vl), 2.f, vl), _reciprocal, vl); */ \ return _reciprocal; \ } -#else // C906 +#else // __riscv_xtheadvector #define _RVV_FLOAT16_SIGMOID_OP(LMUL, MLEN) \ static inline vfloat16m##LMUL##_t sigmoid_ps(vfloat16m##LMUL##_t _v, size_t vl) \ { \ @@ -389,7 +389,7 @@ _RVV_FLOAT16_POW_OP(8, 2) /* _reciprocal = __riscv_vfmul_vv_f16m##LMUL(__riscv_vfrsub_vf_f16m##LMUL(__riscv_vfmul_vv_f16m##LMUL(_v, _reciprocal, vl), 2.f, vl), _reciprocal, vl); */ \ return _reciprocal; \ } -#endif // C906 +#endif // __riscv_xtheadvector _RVV_FLOAT16_SIGMOID_OP(1, 16) _RVV_FLOAT16_SIGMOID_OP(2, 8) diff --git a/src/layer/riscv/unaryop_riscv.cpp b/src/layer/riscv/unaryop_riscv.cpp index 82170d94f27..58af0802d33 100644 --- a/src/layer/riscv/unaryop_riscv.cpp +++ b/src/layer/riscv/unaryop_riscv.cpp @@ -120,7 +120,7 @@ struct unary_op_rsqrt { vfloat32m8_t operator()(const vfloat32m8_t& x, const size_t& vl) const { -#if C906 +#if __riscv_xtheadvector vfloat32m8_t _reciprocal = __riscv_vfrdiv_vf_f32m8(__riscv_vfsqrt_v_f32m8(x, vl), 1.f, vl); #else vfloat32m8_t _reciprocal = __riscv_vfrsqrt7_v_f32m8(x, vl); @@ -227,7 +227,7 @@ struct unary_op_reciprocal { vfloat32m8_t operator()(const vfloat32m8_t& x, const size_t& vl) const { -#if C906 +#if __riscv_xtheadvector vfloat32m8_t _reciprocal = __riscv_vfrdiv_vf_f32m8(x, 1.f, vl); #else vfloat32m8_t _reciprocal = __riscv_vfrec7_v_f32m8(x, vl); @@ -266,7 +266,7 @@ struct unary_op_trunc { vfloat32m8_t operator()(const vfloat32m8_t& x, const size_t& vl) const { -#if C906 +#if __riscv_xtheadvector // simulate trunc with floor positives and ceil negative // xi = round(x) // floorx = xi - (xi > x) @@ -279,7 +279,7 @@ struct unary_op_trunc vbool4_t _ceilmask = __riscv_vmflt_vv_f32m8_b4(_xf, x, vl); vint32m8_t _ceilx = __riscv_vadd_vx_i32m8_mu(_ceilmask, _xi, _xi, 1, vl); vbool4_t _negative = __riscv_vmflt_vf_f32m8_b4(x, 0.f, vl); - return __riscv_vfcvt_f_x_v_f32m8(__riscv_vmerge_vvm_i32m8(_negative, _floorx, _ceilx, vl), vl); + return __riscv_vfcvt_f_x_v_f32m8(__riscv_vmerge_vvm_i32m8(_floorx, _ceilx, _negative, vl), vl); #else return __riscv_vfcvt_f_x_v_f32m8(__riscv_vfcvt_rtz_x_f_v_i32m8(x, vl), vl); #endif diff --git a/src/layer/riscv/unaryop_riscv_zvfh.cpp b/src/layer/riscv/unaryop_riscv_zvfh.cpp index c2611dc9907..e706002dd3c 100644 --- a/src/layer/riscv/unaryop_riscv_zvfh.cpp +++ b/src/layer/riscv/unaryop_riscv_zvfh.cpp @@ -113,7 +113,7 @@ struct unary_op_rsqrt_fp16s { vfloat16m8_t operator()(const vfloat16m8_t& x, const size_t& vl) const { -#if C906 +#if __riscv_xtheadvector vfloat16m8_t _reciprocal = __riscv_vfrdiv_vf_f16m8(__riscv_vfsqrt_v_f16m8(x, vl), 1.f, vl); #else vfloat16m8_t _reciprocal = __riscv_vfrsqrt7_v_f16m8(x, vl); @@ -220,7 +220,7 @@ struct unary_op_reciprocal_fp16s { vfloat16m8_t operator()(const vfloat16m8_t& x, const size_t& vl) const { -#if C906 +#if __riscv_xtheadvector vfloat16m8_t _reciprocal = __riscv_vfrdiv_vf_f16m8(x, 1.f, vl); #else vfloat16m8_t _reciprocal = __riscv_vfrec7_v_f16m8(x, vl); @@ -259,7 +259,7 @@ struct unary_op_trunc_fp16s { vfloat16m8_t operator()(const vfloat16m8_t& x, const size_t& vl) const { -#if C906 +#if __riscv_xtheadvector // simulate trunc with floor positives and ceil negative // xi = round(x) // floorx = xi - (xi > x) @@ -272,7 +272,7 @@ struct unary_op_trunc_fp16s vbool2_t _ceilmask = __riscv_vmflt_vv_f16m8_b2(_xf, x, vl); vint16m8_t _ceilx = __riscv_vadd_vx_i16m8_mu(_ceilmask, _xi, _xi, 1, vl); vbool2_t _negative = __riscv_vmflt_vf_f16m8_b2(x, 0.f, vl); - return __riscv_vfcvt_f_x_v_f16m8(__riscv_vmerge_vvm_i16m8(_negative, _floorx, _ceilx, vl), vl); + return __riscv_vfcvt_f_x_v_f16m8(__riscv_vmerge_vvm_i16m8(_floorx, _ceilx, _negative, vl), vl); #else return __riscv_vfcvt_f_x_v_f16m8(__riscv_vfcvt_rtz_x_f_v_i16m8(x, vl), vl); #endif diff --git a/toolchains/riscv64-unknown-linux-gnu.llvm-toolchain.cmake b/toolchains/riscv64-unknown-linux-gnu.llvm-toolchain.cmake index e9c6ad29e2a..c138edc76c1 100644 --- a/toolchains/riscv64-unknown-linux-gnu.llvm-toolchain.cmake +++ b/toolchains/riscv64-unknown-linux-gnu.llvm-toolchain.cmake @@ -9,8 +9,8 @@ endif() set(RISCV_ROOT_PATH ${RISCV_ROOT_PATH} CACHE STRING "root path to riscv gnu toolchain") -set(CMAKE_C_COMPILER "clang") -set(CMAKE_CXX_COMPILER "clang++") +set(CMAKE_C_COMPILER "${RISCV_ROOT_PATH}/bin/clang") +set(CMAKE_CXX_COMPILER "${RISCV_ROOT_PATH}/bin/clang++") set(CMAKE_SYSROOT "${RISCV_ROOT_PATH}/sysroot") set(CMAKE_C_COMPILER_TARGET "riscv64-unknown-linux-gnu")