diff --git a/src/layer/reduction.cpp b/src/layer/reduction.cpp index 4d4f7fb578b..55648f8eaf1 100644 --- a/src/layer/reduction.cpp +++ b/src/layer/reduction.cpp @@ -1064,7 +1064,11 @@ struct post_process_sqrt { T operator()(const T& x) const { - return static_cast(sqrtf(x)); + // math optimization will probably generate rsqrt + // that produce -inf on sse with subnormal input + // flush subnormal input to zero as a workaround + // TODO explicit use simd sqrt like unaryop --- nihui + return static_cast(sqrtf(x < FLT_MIN ? 0.f : x)); } };