diff --git a/src/mono/mono/mini/cpu-arm64.mdesc b/src/mono/mono/mini/cpu-arm64.mdesc index 2479dd0d22d62..0f8a3a2d23455 100644 --- a/src/mono/mono/mini/cpu-arm64.mdesc +++ b/src/mono/mono/mini/cpu-arm64.mdesc @@ -553,8 +553,6 @@ arm64_xtn2: dest:x src1:x src2:x len:4 clob:1 arm64_fcvtn: dest:x src1:x len:4 arm64_fcvtn2: dest:x src1:x src2:x len:4 clob:1 xunop: dest:x src1:x len:4 -arm64_ushl: dest:x src1:x src2:x len:4 -arm64_ext_imm: dest:x src1:x src2:x len:4 xinsert_i8: dest:x src1:x src2:i src3:i len:20 xinsert_r8: dest:x src1:x src2:f src3:i len:20 arm64_broadcast_elem: dest:x src1:x len:16 diff --git a/src/mono/mono/mini/mini-arm64.c b/src/mono/mono/mini/mini-arm64.c index 12dd911d7683f..528abe9412e5e 100644 --- a/src/mono/mono/mini/mini-arm64.c +++ b/src/mono/mono/mini/mini-arm64.c @@ -3870,6 +3870,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case INTRINS_AARCH64_ADV_SIMD_TBL1: arm_neon_tbl1_16b (code, dreg, sreg1, sreg2); break; + case INTRINS_AARCH64_ADV_SIMD_USHL: + arm_neon_ushl (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1, sreg2); + break; default: g_assert_not_reached (); break; @@ -4138,17 +4141,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) arm_neon_ins_e (code, SIZE_8, dreg, sreg2, 1, 0); break; } - case OP_ARM64_USHL: { - arm_neon_ushl (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1, sreg2); - break; - } - case OP_ARM64_EXT_IMM: { - if (get_vector_size_macro (ins) == VREG_LOW) - arm_neon_ext_8b (code, dreg, sreg1, sreg2, ins->inst_c0); - else - arm_neon_ext_16b (code, dreg, sreg1, sreg2, ins->inst_c0); - break; - } case OP_XLOWER: { if (dreg == sreg1) { // clean the upper half diff --git a/src/mono/mono/mini/mini-ops.h b/src/mono/mono/mini/mini-ops.h index d39c0d15183ba..d95f094807340 100644 --- a/src/mono/mono/mini/mini-ops.h +++ b/src/mono/mono/mini/mini-ops.h @@ -1792,7 +1792,6 @@ MINI_OP(OP_ARM64_ABSCOMPARE, "arm64_abscompare", XREG, XREG, XREG) MINI_OP(OP_ARM64_XNARROW_SCALAR, "arm64_xnarrow_scalar", XREG, XREG, NONE) MINI_OP3(OP_ARM64_EXT, "arm64_ext", XREG, XREG, XREG, IREG) -MINI_OP(OP_ARM64_EXT_IMM, "arm64_ext_imm", XREG, XREG, XREG) MINI_OP3(OP_ARM64_SQRDMLAH, "arm64_sqrdmlah", XREG, XREG, XREG, XREG) MINI_OP3(OP_ARM64_SQRDMLAH_BYSCALAR, "arm64_sqrdmlah_byscalar", XREG, XREG, XREG, XREG) @@ -1809,8 +1808,6 @@ MINI_OP3(OP_ARM64_SQRDMLSH_SCALAR, "arm64_sqrdmlsh_scalar", XREG, XREG, XREG, XR MINI_OP(OP_ARM64_TBL_INDIRECT, "arm64_tbl_indirect", XREG, IREG, XREG) MINI_OP3(OP_ARM64_TBX_INDIRECT, "arm64_tbx_indirect", XREG, IREG, XREG, XREG) -MINI_OP(OP_ARM64_USHL, "arm64_ushl", XREG, XREG, XREG) - #endif // TARGET_ARM64 MINI_OP(OP_SIMD_FCVTL, "simd_convert_to_higher_precision", XREG, XREG, NONE) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 536e0e135d8c1..011dee3363a0c 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1805,9 +1805,6 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi #ifdef TARGET_WASM return emit_simd_ins_for_sig (cfg, klass, OP_WASM_SIMD_BITMASK, -1, -1, fsig, args); #elif defined(TARGET_ARM64) - if (COMPILE_LLVM (cfg)) - return NULL; - MonoInst* result_ins = NULL; MonoClass* arg_class = mono_class_from_mono_type_internal (fsig->params [0]); int size = mono_class_value_size (arg_class, NULL); @@ -1819,27 +1816,22 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi and_res_vec->sreg2 = msb_mask_vec->dreg; MonoInst* msb_shift_vec = emit_msb_shift_vector_constant (cfg, arg_class, arg0_type); - MonoInst* shift_res_vec = emit_simd_ins (cfg, arg_class, OP_ARM64_USHL, and_res_vec->dreg, msb_shift_vec->dreg); + + MonoInst* shift_res_vec = emit_simd_ins (cfg, arg_class, OP_XOP_OVR_X_X_X, and_res_vec->dreg, msb_shift_vec->dreg); + shift_res_vec->inst_c0 = INTRINS_AARCH64_ADV_SIMD_USHL; shift_res_vec->inst_c1 = arg0_type; if (arg0_type == MONO_TYPE_I1 || arg0_type == MONO_TYPE_U1) { - // Always perform usigned operations as vector sum and extract operations could sign-extend the result into the GP register + // Always perform unsigned operations as vector sum and extract operations could sign-extend the result into the GP register // making the final result invalid. This is not needed for wider type as the maximum sum of extracted MSB cannot be larger than 8bits arg0_type = MONO_TYPE_U1; - // In order to sum high and low 64bits of the shifted vector separatly, we use a zeroed vector and the extract operation - MonoInst* zero_vec = emit_xzero(cfg, arg_class); - - MonoInst* ext_low_vec = emit_simd_ins (cfg, arg_class, OP_ARM64_EXT_IMM, zero_vec->dreg, shift_res_vec->dreg); - ext_low_vec->inst_c0 = 8; - ext_low_vec->inst_c1 = arg0_type; + MonoInst* ext_low_vec = emit_simd_ins_for_sig (cfg, arg_class, OP_XLOWER, 8, arg0_type, fsig, &shift_res_vec); MonoInst* sum_low_vec = emit_sum_vector (cfg, fsig->params [0], arg0_type, ext_low_vec); - - MonoInst* ext_high_vec = emit_simd_ins (cfg, arg_class, OP_ARM64_EXT_IMM, shift_res_vec->dreg, zero_vec->dreg); - ext_high_vec->inst_c0 = 8; - ext_high_vec->inst_c1 = arg0_type; - MonoInst* sum_high_vec = emit_sum_vector (cfg, fsig->params [0], arg0_type, ext_high_vec); - + + MonoInst* ext_high_vec = emit_simd_ins_for_sig (cfg, arg_class, OP_XUPPER, 8, arg0_type, fsig, &shift_res_vec); + MonoInst* sum_high_vec = emit_sum_vector (cfg, fsig->params [0], arg0_type, ext_high_vec); + MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SHL_IMM, sum_high_vec->dreg, sum_high_vec->dreg, 8); EMIT_NEW_BIALU (cfg, result_ins, OP_IOR, sum_high_vec->dreg, sum_high_vec->dreg, sum_low_vec->dreg); } else {