Skip to content

Commit

Permalink
[mono][jit] Arm64 SIMD regs are now zeroed with movi instead of eor (#…
Browse files Browse the repository at this point in the history
…92882)

* SIMD regs are now zeroed with movi instead of eor.

* Simplified vector length selection.
  • Loading branch information
jandupej authored Oct 3, 2023
1 parent 3b0cd57 commit 7abea9e
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 11 deletions.
1 change: 1 addition & 0 deletions src/mono/mono/arch/arm64/arm64-codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -2289,6 +2289,7 @@ arm_encode_arith_imm (int imm, guint32 *shift)

/* NEON :: modified immediate */
#define arm_neon_mimm_opcode(p, q, op, cmode, o2, imm, rd) arm_neon_opcode_1reg ((p), (q), 0b00001111000000000000010000000000 | (op) << 29 | (cmode) << 12 | (o2) << 11 | (imm & 0b11100000) << 11 | (imm & 0b11111) << 5, (rd))
#define arm_neon_movi_b(p, width, rd, imm) arm_neon_mimm_opcode ((p), (width), 0, 0b1110, 0, imm, rd)

#define ARM_IMM_FONE (0b01110000)
#define arm_neon_fmov_imm(p, width, type, rd, imm) arm_neon_mimm_opcode ((p), (width), (type), 0b1111, 0b0, (imm), (rd))
Expand Down
18 changes: 7 additions & 11 deletions src/mono/mono/mini/mini-arm64.c
Original file line number Diff line number Diff line change
Expand Up @@ -3932,14 +3932,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
break;

case OP_XZERO:
if (ins->klass && mono_class_value_size (ins->klass, NULL) == 8)
arm_neon_eor_8b (code, dreg, dreg, dreg);
else
arm_neon_eor_16b (code, dreg, dreg, dreg);
arm_neon_movi_b (code, get_vector_size_macro (ins), dreg, 0);
break;
case OP_XONES:
arm_neon_eor_16b (code, dreg, dreg, dreg);
arm_neon_not_16b (code, dreg, dreg);
arm_neon_movi_b (code, get_vector_size_macro (ins), dreg, 0xff);
break;
case OP_XEXTRACT:
code = emit_xextract (code, (ins->inst_c1 == 8) ? VREG_LOW : VREG_FULL, GTMREG_TO_INT (ins->inst_c0), dreg, sreg1);
Expand Down Expand Up @@ -4133,7 +4129,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
}
case OP_CREATE_SCALAR_INT: {
const int t = get_type_size_macro (ins->inst_c1);
arm_neon_eor_16b (code, dreg, dreg, dreg);
arm_neon_movi_b (code, VREG_FULL, dreg, 0);
arm_neon_ins_g(code, t, dreg, sreg1, 0);
break;
}
Expand All @@ -4148,7 +4144,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
break;
}
// Use a temp register for zero op, as sreg1 and dreg share the same register here
arm_neon_eor_16b (code, NEON_TMP_REG, NEON_TMP_REG, NEON_TMP_REG);
arm_neon_movi_b (code, VREG_FULL, NEON_TMP_REG, 0);
arm_neon_ins_e(code, t, NEON_TMP_REG, sreg1, 0, 0);
arm_neon_mov (code, dreg, NEON_TMP_REG);
break;
Expand Down Expand Up @@ -4183,17 +4179,17 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
case OP_XLOWER: {
if (dreg == sreg1) {
// clean the upper half
arm_neon_eor (code, VREG_FULL, NEON_TMP_REG, NEON_TMP_REG, NEON_TMP_REG);
arm_neon_movi_b (code, VREG_FULL, NEON_TMP_REG, 0);
arm_neon_ins_e (code, SIZE_8, dreg, NEON_TMP_REG, 1, 0);
} else {
arm_neon_eor (code, VREG_FULL, dreg, dreg, dreg);
arm_neon_movi_b (code, VREG_FULL, dreg, 0);
arm_neon_mov_8b (code, dreg, sreg1);
}
break;
}
case OP_XUPPER:
// shift in 64 zeros from the left
arm_neon_eor (code, VREG_FULL, NEON_TMP_REG, NEON_TMP_REG, NEON_TMP_REG);
arm_neon_movi_b (code, VREG_FULL, NEON_TMP_REG, 0);
arm_neon_ext_16b (code, dreg, sreg1, NEON_TMP_REG, 8);
break;

Expand Down
1 change: 1 addition & 0 deletions src/mono/mono/mini/simd-intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -1291,6 +1291,7 @@ static MonoInst*
emit_msb_vector_mask (MonoCompile *cfg, MonoClass *arg_class, MonoTypeEnum arg_type)
{
guint64 msb_mask_value[2];
// TODO: with mini, one can emit movi to achieve broadcasting immediate i8/i16/i32

switch (arg_type) {
case MONO_TYPE_I1:
Expand Down

0 comments on commit 7abea9e

Please sign in to comment.