Skip to content

Commit

Permalink
port xtheadvector recp rsqrt trunc
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed Nov 16, 2024
1 parent 2a462fe commit 97ff591
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 16 deletions.
2 changes: 2 additions & 0 deletions src/layer/riscv/gru_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include <riscv_vector.h>
#endif // __riscv_vector

#include "cpu.h"

namespace ncnn {

//core rvv-optimized gru impl.
Expand Down
6 changes: 3 additions & 3 deletions src/layer/riscv/rvv_mathfun.h
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ _RVV_FLOAT32_POW_OP(2, 16)
_RVV_FLOAT32_POW_OP(4, 8)
_RVV_FLOAT32_POW_OP(8, 4)

#if C906
#if __riscv_xtheadvector
#define _RVV_FLOAT32_SIGMOID_OP(LMUL, MLEN) \
static inline vfloat32m##LMUL##_t sigmoid_ps(vfloat32m##LMUL##_t _v, size_t vl) \
{ \
Expand All @@ -377,7 +377,7 @@ _RVV_FLOAT32_POW_OP(8, 4)
/* _reciprocal = __riscv_vfmul_vv_f32m##LMUL(__riscv_vfrsub_vf_f32m##LMUL(__riscv_vfmul_vv_f32m##LMUL(_v, _reciprocal, vl), 2.f, vl), _reciprocal, vl); */ \
return _reciprocal; \
}
#else // C906
#else // __riscv_xtheadvector
#define _RVV_FLOAT32_SIGMOID_OP(LMUL, MLEN) \
static inline vfloat32m##LMUL##_t sigmoid_ps(vfloat32m##LMUL##_t _v, size_t vl) \
{ \
Expand All @@ -389,7 +389,7 @@ _RVV_FLOAT32_POW_OP(8, 4)
/* _reciprocal = __riscv_vfmul_vv_f32m##LMUL(__riscv_vfrsub_vf_f32m##LMUL(__riscv_vfmul_vv_f32m##LMUL(_v, _reciprocal, vl), 2.f, vl), _reciprocal, vl); */ \
return _reciprocal; \
}
#endif // C906
#endif // __riscv_xtheadvector

_RVV_FLOAT32_SIGMOID_OP(1, 32)
_RVV_FLOAT32_SIGMOID_OP(2, 16)
Expand Down
6 changes: 3 additions & 3 deletions src/layer/riscv/rvv_mathfun_fp16s.h
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ _RVV_FLOAT16_POW_OP(2, 8)
_RVV_FLOAT16_POW_OP(4, 4)
_RVV_FLOAT16_POW_OP(8, 2)

#if C906
#if __riscv_xtheadvector
#define _RVV_FLOAT16_SIGMOID_OP(LMUL, MLEN) \
static inline vfloat16m##LMUL##_t sigmoid_ps(vfloat16m##LMUL##_t _v, size_t vl) \
{ \
Expand All @@ -377,7 +377,7 @@ _RVV_FLOAT16_POW_OP(8, 2)
/* _reciprocal = __riscv_vfmul_vv_f16m##LMUL(__riscv_vfrsub_vf_f16m##LMUL(__riscv_vfmul_vv_f16m##LMUL(_v, _reciprocal, vl), 2.f, vl), _reciprocal, vl); */ \
return _reciprocal; \
}
#else // C906
#else // __riscv_xtheadvector
#define _RVV_FLOAT16_SIGMOID_OP(LMUL, MLEN) \
static inline vfloat16m##LMUL##_t sigmoid_ps(vfloat16m##LMUL##_t _v, size_t vl) \
{ \
Expand All @@ -389,7 +389,7 @@ _RVV_FLOAT16_POW_OP(8, 2)
/* _reciprocal = __riscv_vfmul_vv_f16m##LMUL(__riscv_vfrsub_vf_f16m##LMUL(__riscv_vfmul_vv_f16m##LMUL(_v, _reciprocal, vl), 2.f, vl), _reciprocal, vl); */ \
return _reciprocal; \
}
#endif // C906
#endif // __riscv_xtheadvector

_RVV_FLOAT16_SIGMOID_OP(1, 16)
_RVV_FLOAT16_SIGMOID_OP(2, 8)
Expand Down
8 changes: 4 additions & 4 deletions src/layer/riscv/unaryop_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ struct unary_op_rsqrt
{
vfloat32m8_t operator()(const vfloat32m8_t& x, const size_t& vl) const
{
#if C906
#if __riscv_xtheadvector
vfloat32m8_t _reciprocal = __riscv_vfrdiv_vf_f32m8(__riscv_vfsqrt_v_f32m8(x, vl), 1.f, vl);
#else
vfloat32m8_t _reciprocal = __riscv_vfrsqrt7_v_f32m8(x, vl);
Expand Down Expand Up @@ -227,7 +227,7 @@ struct unary_op_reciprocal
{
vfloat32m8_t operator()(const vfloat32m8_t& x, const size_t& vl) const
{
#if C906
#if __riscv_xtheadvector
vfloat32m8_t _reciprocal = __riscv_vfrdiv_vf_f32m8(x, 1.f, vl);
#else
vfloat32m8_t _reciprocal = __riscv_vfrec7_v_f32m8(x, vl);
Expand Down Expand Up @@ -266,7 +266,7 @@ struct unary_op_trunc
{
vfloat32m8_t operator()(const vfloat32m8_t& x, const size_t& vl) const
{
#if C906
#if __riscv_xtheadvector
// simulate trunc with floor positives and ceil negative
// xi = round(x)
// floorx = xi - (xi > x)
Expand All @@ -279,7 +279,7 @@ struct unary_op_trunc
vbool4_t _ceilmask = __riscv_vmflt_vv_f32m8_b4(_xf, x, vl);
vint32m8_t _ceilx = __riscv_vadd_vx_i32m8_mu(_ceilmask, _xi, _xi, 1, vl);
vbool4_t _negative = __riscv_vmflt_vf_f32m8_b4(x, 0.f, vl);
return __riscv_vfcvt_f_x_v_f32m8(__riscv_vmerge_vvm_i32m8(_negative, _floorx, _ceilx, vl), vl);
return __riscv_vfcvt_f_x_v_f32m8(__riscv_vmerge_vvm_i32m8(_floorx, _ceilx, _negative, vl), vl);
#else
return __riscv_vfcvt_f_x_v_f32m8(__riscv_vfcvt_rtz_x_f_v_i32m8(x, vl), vl);
#endif
Expand Down
8 changes: 4 additions & 4 deletions src/layer/riscv/unaryop_riscv_zvfh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ struct unary_op_rsqrt_fp16s
{
vfloat16m8_t operator()(const vfloat16m8_t& x, const size_t& vl) const
{
#if C906
#if __riscv_xtheadvector
vfloat16m8_t _reciprocal = __riscv_vfrdiv_vf_f16m8(__riscv_vfsqrt_v_f16m8(x, vl), 1.f, vl);
#else
vfloat16m8_t _reciprocal = __riscv_vfrsqrt7_v_f16m8(x, vl);
Expand Down Expand Up @@ -220,7 +220,7 @@ struct unary_op_reciprocal_fp16s
{
vfloat16m8_t operator()(const vfloat16m8_t& x, const size_t& vl) const
{
#if C906
#if __riscv_xtheadvector
vfloat16m8_t _reciprocal = __riscv_vfrdiv_vf_f16m8(x, 1.f, vl);
#else
vfloat16m8_t _reciprocal = __riscv_vfrec7_v_f16m8(x, vl);
Expand Down Expand Up @@ -259,7 +259,7 @@ struct unary_op_trunc_fp16s
{
vfloat16m8_t operator()(const vfloat16m8_t& x, const size_t& vl) const
{
#if C906
#if __riscv_xtheadvector
// simulate trunc with floor positives and ceil negative
// xi = round(x)
// floorx = xi - (xi > x)
Expand All @@ -272,7 +272,7 @@ struct unary_op_trunc_fp16s
vbool2_t _ceilmask = __riscv_vmflt_vv_f16m8_b2(_xf, x, vl);
vint16m8_t _ceilx = __riscv_vadd_vx_i16m8_mu(_ceilmask, _xi, _xi, 1, vl);
vbool2_t _negative = __riscv_vmflt_vf_f16m8_b2(x, 0.f, vl);
return __riscv_vfcvt_f_x_v_f16m8(__riscv_vmerge_vvm_i16m8(_negative, _floorx, _ceilx, vl), vl);
return __riscv_vfcvt_f_x_v_f16m8(__riscv_vmerge_vvm_i16m8(_floorx, _ceilx, _negative, vl), vl);
#else
return __riscv_vfcvt_f_x_v_f16m8(__riscv_vfcvt_rtz_x_f_v_i16m8(x, vl), vl);
#endif
Expand Down
4 changes: 2 additions & 2 deletions toolchains/riscv64-unknown-linux-gnu.llvm-toolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ endif()

set(RISCV_ROOT_PATH ${RISCV_ROOT_PATH} CACHE STRING "root path to riscv gnu toolchain")

set(CMAKE_C_COMPILER "clang")
set(CMAKE_CXX_COMPILER "clang++")
set(CMAKE_C_COMPILER "${RISCV_ROOT_PATH}/bin/clang")
set(CMAKE_CXX_COMPILER "${RISCV_ROOT_PATH}/bin/clang++")
set(CMAKE_SYSROOT "${RISCV_ROOT_PATH}/sysroot")

set(CMAKE_C_COMPILER_TARGET "riscv64-unknown-linux-gnu")
Expand Down

0 comments on commit 97ff591

Please sign in to comment.