Skip to content

Commit

Permalink
[mono][jit] Add JIT support for the methods in Vector128<T> on amd64. (
Browse files Browse the repository at this point in the history
…#86546)

* [mono][jit] Add JIT support for the methods in Vector128<T> on amd64.

* Avoid emitting OP_NOT_NULL opcodes in non-llvm mode, they are not used, and they keep their inputs alive.

* Return true for IsSupported for non 128 bit types even if the operations are not supported.

* Fix windows support.

* Add support for some binary operations.

* Disable Dot for integer types.

* Add support for Create.

* Fix some typos in the intrinsics tests.

* Enable As methods.

* Enable Abs/AndNot. Simplify INSERT_R8.

* Add more methods.

* Add compare methods.

* Add CreateScalar/CreateScalarUnsafe.

* Add Sqrt/Sum.

* Fix GetElement/WithElement<long>.

* Implement some of the convert methods, disable the rest.

* Fix GetElement<int>.

* Require SSE41.

* Fix OP_EXTRACT_I1.

* Fix INSERT_R8.

* Improve OP_NEGATION R4/R8.
  • Loading branch information
vargaz authored Jun 13, 2023
1 parent 9bcaff3 commit ac2d3fb
Show file tree
Hide file tree
Showing 8 changed files with 506 additions and 106 deletions.
26 changes: 24 additions & 2 deletions src/mono/mono/arch/amd64/amd64-codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,13 @@ typedef union {
amd64_codegen_post(inst); \
} while (0)

#define emit_sse_reg_reg_op4_size_imm(inst,dreg,reg,op1,op2,op3,op4,size,imm) do { \
amd64_codegen_pre(inst); \
emit_sse_reg_reg_op4_size ((inst), (dreg), (reg), (op1), (op2), (op3), (op4), (size)); \
x86_imm_emit8 ((inst), (imm)); \
amd64_codegen_post(inst); \
} while (0)

/* specific SSE opcode defines */

#define amd64_sse_xorpd_reg_reg(inst,dreg,reg) emit_sse_reg_reg ((inst),(dreg),(reg), 0x66, 0x0f, 0x57)
Expand Down Expand Up @@ -836,11 +843,16 @@ typedef union {

#define amd64_sse_sqrtsd_reg_reg(inst,dreg,reg) emit_sse_reg_reg((inst), (dreg), (reg), 0xf2, 0x0f, 0x51)


#define amd64_sse_pinsrb_reg_reg_imm(inst,dreg,reg,imm) emit_sse_reg_reg_op4_imm ((inst), (dreg), (reg), 0x66, 0x0f, 0x3a, 0x20, (imm))
#define amd64_sse_pinsrd_reg_reg_imm(inst,dreg,reg,imm) emit_sse_reg_reg_op4_imm ((inst), (dreg), (reg), 0x66, 0x0f, 0x3a, 0x22, (imm))
#define amd64_sse_pinsrq_reg_reg_imm(inst,dreg,reg,imm) emit_sse_reg_reg_op4_size_imm ((inst), (dreg), (reg), 0x66, 0x0f, 0x3a, 0x22, 8, (imm))
#define amd64_sse_pinsrw_reg_reg_imm(inst,dreg,reg,imm) emit_sse_reg_reg_imm ((inst), (dreg), (reg), 0x66, 0x0f, 0xc4, (imm))
#define amd64_sse_insertps_reg_reg(inst,dreg,reg,imm) emit_sse_reg_reg_op4_imm ((inst), (dreg), (reg), 0x66, 0x0f, 0x3a, 0x21, (imm))
#define amd64_sse_pblendw_reg_reg_imm(inst,dreg,reg,imm) emit_sse_reg_reg_op4_imm ((inst), (dreg), (reg), 0x66, 0x0f, 0x3a, 0x0e, (imm))

#define amd64_sse_pextrw_reg_reg_imm(inst,dreg,reg,imm) emit_sse_reg_reg_imm ((inst), (dreg), (reg), 0x66, 0x0f, 0xc5, (imm))

#define amd64_sse_pextrb_reg_reg_imm(inst,dreg,reg,imm) emit_sse_reg_reg_op4_imm ((inst), (reg), (dreg), 0x66, 0x0f, 0x3a, 0x14, (imm))
#define amd64_sse_pextrd_reg_reg_imm(inst,dreg,reg,imm) emit_sse_reg_reg_op4_imm ((inst), (reg), (dreg), 0x66, 0x0f, 0x3a, 0x16, (imm))

#define amd64_sse_cvttsd2si_reg_xreg_size(inst,reg,xreg,size) emit_sse_reg_reg_size ((inst), (reg), (xreg), 0xf2, 0x0f, 0x2c, (size))

Expand Down Expand Up @@ -894,6 +906,8 @@ typedef union {

#define amd64_sse_shufpd_reg_reg_imm(inst,dreg,reg,imm) emit_sse_reg_reg_imm((inst), (dreg), (reg), 0x66, 0x0f, 0xC6, (imm))

#define amd64_sse_roundps_reg_reg_imm(inst, dreg, reg, imm) emit_sse_reg_reg_op4_imm((inst), (dreg), (reg), 0x66, 0x0f, 0x3a, 0x08, (imm))

#define amd64_sse_roundpd_reg_reg_imm(inst, dreg, reg, imm) emit_sse_reg_reg_op4_imm((inst), (dreg), (reg), 0x66, 0x0f, 0x3a, 0x09, (imm))

#define amd64_sse_addpd_reg_reg(inst,dreg,reg) emit_sse_reg_reg((inst), (dreg), (reg), 0x66, 0x0f, 0x58)
Expand Down Expand Up @@ -1169,6 +1183,14 @@ typedef union {
#define amd64_sse_lzcnt_reg_reg_size(inst, dreg, reg, size) emit_sse_reg_reg_size((inst), (dreg), (reg), 0xf3, 0x0f, 0xbd, (size))
#define amd64_sse_popcnt_reg_reg_size(inst, dreg, reg, size) emit_sse_reg_reg_size((inst), (dreg), (reg), 0xf3, 0x0f, 0xb8, (size))

#define amd64_sse_psrlq_reg_imm(inst, reg, imm) emit_sse_reg_reg_imm((inst), X86_SSE_SHR, (reg), 0x66, 0x0f, 0x73, (imm))
#define amd64_sse_dpps_reg_reg(inst, dreg, sreg, mask) emit_sse_reg_reg_op4_imm((inst), (dreg), (sreg), 0x66, 0x0f, 0x3a, 0x40, (mask))
#define amd64_sse_dppd_reg_reg(inst, dreg, sreg, mask) emit_sse_reg_reg_op4_imm((inst), (dreg), (sreg), 0x66, 0x0f, 0x3a, 0x41, (mask))
#define amd64_sse_phaddw_reg_reg(inst, dreg, sreg) emit_sse_reg_reg_op4((inst), (dreg), (sreg), 0x66, 0x0f, 0x38, 0x01)
#define amd64_sse_phaddd_reg_reg(inst, dreg, sreg) emit_sse_reg_reg_op4((inst), (dreg), (sreg), 0x66, 0x0f, 0x38, 0x02)
#define amd64_sse_blendpd_reg_reg(inst,dreg,sreg,imm) emit_sse_reg_reg_op4_imm((inst), (dreg), (sreg), 0x66, 0x0f, 0x3a, 0x0d, (imm))
#define amd64_movq_reg_reg(inst,dreg,sreg) emit_sse_reg_reg ((inst), (dreg), (sreg), 0xf3, 0x0f, 0x7e)

/* Generated from x86-codegen.h */

#define amd64_breakpoint_size(inst,size) do { x86_breakpoint(inst); } while (0)
Expand Down
24 changes: 22 additions & 2 deletions src/mono/mono/mini/cpu-amd64.mdesc
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,15 @@ loadu2_mem: dest:i len:16

#SIMD

xbinop: dest:x src1:x src2:x len:7 clob:1
xbinop_forceint: dest:x src1:x src2:x len:7 clob:1
ones_complement: dest:x src1:x len:16 clob:1
negate: dest:x src1:x len:24 clob:1
xlower: dest:x src1:x len:16
xupper: dest:x src1:x len:16
convert_fp_to_si: dest:x src1:x len:16
convert_si_to_fp: dest:x src1:x len:16

addps: dest:x src1:x src2:x len:4 clob:1
divps: dest:x src1:x src2:x len:4 clob:1
mulps: dest:x src1:x src2:x len:4 clob:1
Expand Down Expand Up @@ -772,17 +781,23 @@ xones: dest:x len:5
xconst: dest:x len:12

iconv_to_x: dest:x src1:i len:5
extract_i4: dest:i src1:x len:5

extract_i4: dest:i src1:x len:16
extract_i8: dest:i src1:x len:9

extract_i2: dest:i src1:x len:13
extract_i1: dest:i src1:x len:13
extract_r8: dest:f src1:x len:5
extract_r4: dest:f src1:x len:24
xextract: dest:i src1:x len:24

iconv_to_r4_raw: dest:f src1:i len:10

insert_i1: dest:x src1:x src2:i len:7 clob:1
insert_i2: dest:x src1:x src2:i len:6 clob:1
insert_i4: dest:x src1:x src2:i len:7 clob:1
insert_i8: dest:x src1:x src2:i len:7 clob:1
insert_r4: dest:x src1:x src2:f len:7 clob:1
insert_r8: dest:x src1:x src2:f len:24 clob:1

extractx_u2: dest:i src1:x len:6
insertx_u1_slow: dest:x src1:i src2:i len:18 clob:x
Expand Down Expand Up @@ -810,6 +825,11 @@ expand_i4: dest:x src1:i len:11
expand_i8: dest:x src1:i len:11
expand_r4: dest:x src1:f len:16
expand_r8: dest:x src1:f len:13
xop_x_x_x: dest:x src1:x src2:x len:16 clob:1
xop_x_x: dest:x src1:x len:16 clob:1
sse41_dpps_imm: dest:x src1:x src2:x len:7 clob:1
sse41_dppd_imm: dest:x src1:x src2:x len:7 clob:1
vector_andnot: dest:x src1:x src2:x len:7 clob:1

roundp: dest:x src1:x len:10

Expand Down
6 changes: 3 additions & 3 deletions src/mono/mono/mini/ir-emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -886,7 +886,7 @@ static int ccount = 0;
cfg->flags |= MONO_CFG_HAS_CHECK_THIS; \
MONO_EMIT_NEW_BIALU_IMM (cfg, OP_COMPARE_IMM, -1, (reg), 0); \
MONO_EMIT_NEW_COND_EXC (cfg, EQ, "NullReferenceException"); \
MONO_EMIT_NEW_UNALU (cfg, OP_NOT_NULL, -1, reg); \
if (COMPILE_LLVM (cfg)) MONO_EMIT_NEW_UNALU (cfg, OP_NOT_NULL, -1, reg); \
} while (0)

/* Emit an explicit null check which doesn't depend on SIGSEGV signal handling */
Expand All @@ -897,7 +897,7 @@ static int ccount = 0;
} else { \
MONO_EMIT_NEW_IMPLICIT_EXCEPTION_LOAD_STORE (cfg); \
} \
MONO_EMIT_NEW_UNALU (cfg, OP_NOT_NULL, -1, reg); \
if (COMPILE_LLVM (cfg)) MONO_EMIT_NEW_UNALU (cfg, OP_NOT_NULL, -1, reg); \
} while (0)

#define MONO_EMIT_NEW_CHECK_THIS(cfg, sreg) do { \
Expand All @@ -907,7 +907,7 @@ static int ccount = 0;
} else { \
MONO_EMIT_NEW_UNALU (cfg, OP_CHECK_THIS, -1, sreg); \
MONO_EMIT_NEW_IMPLICIT_EXCEPTION_LOAD_STORE (cfg); \
MONO_EMIT_NEW_UNALU (cfg, OP_NOT_NULL, -1, sreg); \
if (COMPILE_LLVM (cfg)) MONO_EMIT_NEW_UNALU (cfg, OP_NOT_NULL, -1, sreg); \
} \
} while (0)

Expand Down
Loading

0 comments on commit ac2d3fb

Please sign in to comment.