From 8abc0337db08c787e5d461e3c8a2619a0cfc1133 Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 17 Jan 2024 15:06:04 -0800 Subject: [PATCH 01/17] Added SVE_JD_4A format --- src/coreclr/jit/codegenarm64test.cpp | 16 ++++ src/coreclr/jit/emitarm64.cpp | 107 +++++++++++++++++++++++++++ src/coreclr/jit/emitarm64.h | 4 + 3 files changed, 127 insertions(+) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 162cf751c37de..3e26db493b75d 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5811,6 +5811,22 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_I(INS_sve_st4w, EA_SCALABLE, REG_V31, REG_P1, REG_R5, 28, INS_OPTS_SCALABLE_S); // ST4W {.S, .S, .S, .S }, , [{, // #, MUL VL}] + + // IF_SVE_JD_4A + theEmitter->emitIns_R_R_R_R(INS_sve_st1b, EA_SCALABLE, REG_V4, REG_P1, REG_R2, REG_R0, + INS_OPTS_SCALABLE_B); // ST1B {.}, , [, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1b, EA_SCALABLE, REG_V4, REG_P5, REG_R6, REG_R2, + INS_OPTS_SCALABLE_H); // ST1B {.}, , [, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1b, EA_SCALABLE, REG_V6, REG_P5, REG_R7, REG_R4, + INS_OPTS_SCALABLE_S); // ST1B {.}, , [, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1b, EA_SCALABLE, REG_V4, REG_P0, REG_R1, REG_R2, + INS_OPTS_SCALABLE_D); // ST1B {.}, , [, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V5, REG_P6, REG_R1, REG_R2, + INS_OPTS_SCALABLE_H); // ST1H {.}, , [, , LSL #1] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V1, REG_P2, REG_R3, REG_R4, + INS_OPTS_SCALABLE_S); // ST1H {.}, , [, , LSL #1] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V3, REG_P2, REG_R4, REG_R0, + INS_OPTS_SCALABLE_D); // ST1H {.}, , [, , LSL #1] } #endif // defined(TARGET_ARM64) && defined(DEBUG) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index be18133b3d78c..866e3ea279ba3 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1454,6 +1454,22 @@ void emitter::emitInsSanityCheck(instrDesc* id) } break; + case IF_SVE_JD_4A: // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + elemsize = id->idOpSize(); + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isGeneralRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(elemsize)); // xx +#ifdef DEBUG + if ((id->idIns() == INS_sve_st1h) && (id->idInsOpt() == INS_OPTS_SCALABLE_B)) + { + assert(!"sve_st1h with scalable B is reserved"); + } +#endif // DEBUG + break; + default: printf("unexpected format %s\n", emitIfName(id->idInsFmt())); assert(!"Unexpected format"); @@ -10912,6 +10928,23 @@ void emitter::emitIns_R_R_R_R(instruction ins, fmt = IF_SVE_AS_4A; break; + case INS_sve_st1b: + case INS_sve_st1h: + assert(insOptsScalableStandard(opt)); + assert(isVectorRegister(reg1)); // ttttt + assert(isPredicateRegister(reg2)); // ggg + assert(isGeneralRegister(reg3)); // nnnnn + assert(isGeneralRegister(reg4)); // mmmmm + assert(isScalableVectorSize(size)); // xx +#ifdef DEBUG + if ((ins == INS_sve_st1h) && (opt == INS_OPTS_SCALABLE_B)) + { + assert(!"sve_st1h with scalable B is reserved"); + } +#endif // DEBUG + fmt = IF_SVE_JD_4A; + break; + default: unreached(); break; @@ -13740,6 +13773,34 @@ void emitter::emitIns_Call(EmitCallType callType, return 0; } +/***************************************************************************** + * + * Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction + * This specifically encodes the size at bit locations '22-21'. + */ + +/*static*/ emitter::code_t emitter::insEncodeSveElemsize_22_to_21(emitAttr size) +{ + switch (size) + { + case EA_1BYTE: + return 0; + + case EA_2BYTE: + return (1 << 21); // set the bit at location 21 + + case EA_4BYTE: + return (1 << 22); // set the bit at location 22 + + case EA_8BYTE: + return (1 << 22) | (1 << 21); // set the bit at location 22 and 21 + + default: + assert(!"Invalid insOpt for vector register"); + } + return 0; +} + /***************************************************************************** * * Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction @@ -13850,6 +13911,7 @@ void emitter::emitIns_Call(EmitCallType callType, case INS_sve_ldff1d: case INS_sve_ldff1sw: case INS_sve_st1b: + case INS_sve_st1h: case INS_sve_ldff1sb: case INS_sve_ldff1b: case INS_sve_ldnt1sb: @@ -16688,6 +16750,16 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_JD_4A: // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_R_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeReg_R_20_to_16(id->idReg4()); // mmmmm + code |= insEncodeSveElemsize_22_to_21(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + default: assert(!"Unexpected format"); break; @@ -19354,6 +19426,23 @@ void emitter::emitDispInsHelp( printf("]"); break; + case IF_SVE_JD_4A: // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt + emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, id->idInsOpt(), true); // ggg + printf("["); + emitDispReg(id->idReg3(), EA_8BYTE, true); // nnnnn + if (ins == INS_sve_st1h) + { + emitDispReg(id->idReg4(), EA_8BYTE, true); // mmmmm + printf("LSL #1]"); + } + else + { + emitDispReg(id->idReg4(), EA_8BYTE, false); // mmmmm + printf("]"); + } + break; + default: printf("unexpected format %s", emitIfName(id->idInsFmt())); assert(!"unexpectedFormat"); @@ -22184,6 +22273,24 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; + case IF_SVE_JD_4A: // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + switch (ins) + { + case INS_sve_st1b: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + case INS_sve_st1h: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index ef77505f58d43..aa37ed72880c8 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -483,6 +483,10 @@ static code_t insEncodeReg3Scale(bool isScaled); // Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 SVE vector instruction static code_t insEncodeSveElemsize(emitAttr size); +// Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction +// This specifically encodes the size at bit locations '22-21'. +static code_t insEncodeSveElemsize_22_to_21(emitAttr size); + // Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 SVE vector instruction // This specifically encodes the field 'tszh:tszl' at bit locations '22:20-19'. static code_t insEncodeSveElemsize_tszh_22_tszl_20_to_19(emitAttr size); From 3bf2e56e7e99ac16e68f7402649797b84c06177d Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 17 Jan 2024 15:30:09 -0800 Subject: [PATCH 02/17] Added SVE_JD_4B format --- src/coreclr/jit/codegenarm64test.cpp | 6 +++ src/coreclr/jit/emitarm64.cpp | 75 ++++++++++++++++++++++++++++ src/coreclr/jit/emitarm64.h | 4 ++ 3 files changed, 85 insertions(+) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 3e26db493b75d..de3edaed106b6 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5827,6 +5827,12 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_OPTS_SCALABLE_S); // ST1H {.}, , [, , LSL #1] theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V3, REG_P2, REG_R4, REG_R0, INS_OPTS_SCALABLE_D); // ST1H {.}, , [, , LSL #1] + + // IF_SVE_JD_4B + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P1, REG_R2, REG_R3, + INS_OPTS_SCALABLE_S); // ST1W {.}, , [, , LSL #2] + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V2, REG_P3, REG_R4, REG_R5, + INS_OPTS_SCALABLE_D); // ST1W {.}, , [, , LSL #2] } #endif // defined(TARGET_ARM64) && defined(DEBUG) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 866e3ea279ba3..5ecd7c8dd9e83 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1470,6 +1470,16 @@ void emitter::emitInsSanityCheck(instrDesc* id) #endif // DEBUG break; + case IF_SVE_JD_4B: // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + elemsize = id->idOpSize(); + assert(insOptsScalableWords(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isGeneralRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(elemsize)); // x + break; + default: printf("unexpected format %s\n", emitIfName(id->idInsFmt())); assert(!"Unexpected format"); @@ -10945,6 +10955,16 @@ void emitter::emitIns_R_R_R_R(instruction ins, fmt = IF_SVE_JD_4A; break; + case INS_sve_st1w: + assert(insOptsScalableWords(opt)); + assert(isVectorRegister(reg1)); // ttttt + assert(isPredicateRegister(reg2)); // ggg + assert(isGeneralRegister(reg3)); // nnnnn + assert(isGeneralRegister(reg4)); // mmmmm + assert(isScalableVectorSize(size)); // x + fmt = IF_SVE_JD_4B; + break; + default: unreached(); break; @@ -13801,6 +13821,28 @@ void emitter::emitIns_Call(EmitCallType callType, return 0; } +/***************************************************************************** + * + * Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction + * This specifically encodes the field 'sz' at bit location '21'. + */ + +/*static*/ emitter::code_t emitter::insEncodeSveElemsize_sz_21(emitAttr size) +{ + switch (size) + { + case EA_4BYTE: + return 0; + + case EA_8BYTE: + return (1 << 21); + + default: + assert(!"Invalid insOpt for vector register"); + } + return 0; +} + /***************************************************************************** * * Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction @@ -16760,6 +16802,16 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_JD_4B: // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_R_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeReg_R_20_to_16(id->idReg4()); // mmmmm + code |= insEncodeSveElemsize_sz_21(optGetSveElemsize(id->idInsOpt())); // x + dst += emitOutput_Instr(dst, code); + break; + default: assert(!"Unexpected format"); break; @@ -19443,6 +19495,15 @@ void emitter::emitDispInsHelp( } break; + case IF_SVE_JD_4B: // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt + emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, id->idInsOpt(), true); // ggg + printf("["); + emitDispReg(id->idReg3(), EA_8BYTE, true); // nnnnn + emitDispReg(id->idReg4(), EA_8BYTE, true); // mmmmm + printf("LSL #2]"); + break; + default: printf("unexpected format %s", emitIfName(id->idInsFmt())); assert(!"unexpectedFormat"); @@ -22291,6 +22352,20 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; + case IF_SVE_JD_4B: // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + switch (ins) + { + case INS_sve_st1w: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index aa37ed72880c8..4e32c247fa170 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -487,6 +487,10 @@ static code_t insEncodeSveElemsize(emitAttr size); // This specifically encodes the size at bit locations '22-21'. static code_t insEncodeSveElemsize_22_to_21(emitAttr size); +// Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction +// This specifically encodes the field 'sz' at bit location '21'. +static code_t insEncodeSveElemsize_sz_21(emitAttr size); + // Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 SVE vector instruction // This specifically encodes the field 'tszh:tszl' at bit locations '22:20-19'. static code_t insEncodeSveElemsize_tszh_22_tszl_20_to_19(emitAttr size); From 89e5b82b94388d60f9ce58b262a6672ec0a3f702 Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 17 Jan 2024 17:28:23 -0800 Subject: [PATCH 03/17] Added SVE_JJ_4A format, added more formats but they are commented out for now --- src/coreclr/jit/codegenarm64test.cpp | 52 +++ src/coreclr/jit/emitarm64.cpp | 497 ++++++++++++++++++++++++++- src/coreclr/jit/emitarm64.h | 17 + src/coreclr/jit/instr.h | 3 + 4 files changed, 555 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index de3edaed106b6..59189af6e8ee0 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5833,6 +5833,58 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_OPTS_SCALABLE_S); // ST1W {.}, , [, , LSL #2] theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V2, REG_P3, REG_R4, REG_R5, INS_OPTS_SCALABLE_D); // ST1W {.}, , [, , LSL #2] + + // IF_SVE_JJ_4A + theEmitter->emitIns_R_R_R_R(INS_sve_st1d, EA_SCALABLE, REG_V0, REG_P1, REG_R2, REG_V3, INS_OPTS_UXTW, + INS_SCALABLE_OPTS_MOD_N); // ST1D {.D }, , [, .D, #3] + theEmitter->emitIns_R_R_R_R(INS_sve_st1d, EA_SCALABLE, REG_V0, REG_P1, REG_R2, REG_V3, INS_OPTS_SXTW, + INS_SCALABLE_OPTS_MOD_N); // ST1D {.D }, , [, .D, #3] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V3, REG_P1, REG_R5, REG_V4, INS_OPTS_UXTW, + INS_SCALABLE_OPTS_MOD_N); // ST1H {.S }, , [, .S, #1] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V3, REG_P1, REG_R5, REG_V4, INS_OPTS_SXTW, + INS_SCALABLE_OPTS_MOD_N); // ST1H {.S }, , [, .S, #1] + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P3, REG_R1, REG_V2, INS_OPTS_UXTW, + INS_SCALABLE_OPTS_MOD_N); // ST1W {.S }, , [, .S, #2] + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P3, REG_R1, REG_V2, INS_OPTS_SXTW, + INS_SCALABLE_OPTS_MOD_N); // ST1W {.S }, , [, .S, #2] + + //// IF_SVE_JJ_4A_B + //theEmitter->emitIns_R_R_R_R(INS_sve_st1d, EA_SCALABLE, REG_V3, REG_P1, REG_R2, REG_V5, + // INS_OPTS_SCALABLE_B); // ST1D {.D }, , [, .D, ] + //theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V2, REG_P3, REG_R1, REG_V4, + // INS_OPTS_SCALABLE_B); // ST1H {.D }, , [, .D, #1] + //theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V1, REG_P4, REG_R2, REG_V3, + // INS_OPTS_SCALABLE_B); // ST1W {.D }, , [, .D, #2] + + //// IF_SVE_JJ_4A_C + //theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V1, REG_P5, REG_R1, REG_V3, + // INS_OPTS_SCALABLE_B); // ST1H {.D }, , [, .D, ] + //theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, REG_V4, + // INS_OPTS_SCALABLE_B); // ST1W {.D }, , [, .D, ] + + //// IF_SVE_JJ_4A_D + //theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V7, REG_P5, REG_R4, REG_V1, + // INS_OPTS_SCALABLE_B); // ST1H {.S }, , [, .S, ] + //theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V1, REG_P2, REG_R3, REG_V2, + // INS_OPTS_SCALABLE_B); // ST1W {.S }, , [, .S, ] + + //// IF_SVE_JK_4A + //theEmitter->emitIns_R_R_R_R(INS_sve_st1b, EA_SCALABLE, REG_V4, REG_P2, REG_R0, REG_V1, + // INS_OPTS_SCALABLE_B); // ST1B {.D }, , [, .D, ] + + //// IF_SVE_JK_4A_B + //theEmitter->emitIns_R_R_R_R(INS_sve_st1b, EA_SCALABLE, REG_V1, REG_P4, REG_R3, REG_V0, + // INS_OPTS_SCALABLE_B); // ST1B {.S }, , [, .S, ] + + //// IF_SVE_JN_3A + //theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V3, REG_P2, REG_R1, 5, + // INS_OPTS_SCALABLE_B); // ST1B {.}, , [{, #, MUL VL}] + //theEmitter->emitIns_R_R_R_I(INS_sve_st1h, EA_SCALABLE, REG_V0, REG_P3, REG_R4, 5, + // INS_OPTS_SCALABLE_B); // ST1H {.}, , [{, #, MUL VL}] + + //// IF_SVE_JN_3B + //theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V2, REG_P1, REG_R3, 5, + // INS_OPTS_SCALABLE_B); // ST1W {.}, , [{, #, MUL VL}] } #endif // defined(TARGET_ARM64) && defined(DEBUG) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 5ecd7c8dd9e83..47ed623faddc4 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1480,6 +1480,46 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isScalableVectorSize(elemsize)); // x break; + case IF_SVE_JJ_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled + // offsets) + case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit + // unscaled offsets) + //assert(insOptsScalable(id->idInsOpt())); + //assert(isVectorRegister(id->idReg10())); // mmmmm + //assert(isPredicateRegister(id->idReg20())); // ggg + //assert(isVectorRegister(id->idReg30())); // ttttt + //assert(isValidGeneralRegister(id->idReg40())); // nnnnn + //assert(isValidImmShift(id->idInsOpt())); // h + break; + + //case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + // elemsize = id->idOpSize(); + // assert(insOptsScalable(id->idInsOpt())); + // assert(isPredicateRegister(id->idReg10())); // ggg + // assert(isVectorRegister(id->idReg20())); // ttttt + // assert(isValidGeneralRegister(id->idReg30())); // nnnnn + // assert(isValidImm()); // iiii + // assert(isValidVectorElemsize(id->idInsOpt())); // xx + // break; + + //case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + // elemsize = id->idOpSize(); + // assert(insOptsScalable(id->idInsOpt())); + // assert(isPredicateRegister(id->idReg10())); // ggg + // assert(isVectorRegister(id->idReg20())); // ttttt + // assert(isValidGeneralRegister(id->idReg30())); // nnnnn + // assert(isValidImm()); // iiii + // assert(isValidVectorElemsize(id->idInsOpt())); // x + // break; + default: printf("unexpected format %s\n", emitIfName(id->idInsFmt())); assert(!"Unexpected format"); @@ -10409,6 +10449,27 @@ void emitter::emitIns_R_R_R_I(instruction ins, fmt = IF_SVE_JO_3A; break; + //case INS_sve_st1b: + //case INS_sve_st1h: + // assert(insOptsScalable(id->idInsOpt())); + // assert(isPredicateRegister(reg10)); // ggg + // assert(isVectorRegister(reg20)); // ttttt + // assert(isValidGeneralRegister(reg30)); // nnnnn + // assert(isValidImm()); // iiii + // assert(isValidVectorElemsize(opt)); // xx + // fmt = IF_SVE_JN_3A; + // break; + + //case INS_sve_st1w: + // assert(insOptsScalable(id->idInsOpt())); + // assert(isPredicateRegister(reg10)); // ggg + // assert(isVectorRegister(reg20)); // ttttt + // assert(isValidGeneralRegister(reg30)); // nnnnn + // assert(isValidImm()); // iiii + // assert(isValidVectorElemsize(opt)); // x + // fmt = IF_SVE_JN_3B; + // break; + default: unreached(); break; @@ -10940,31 +11001,165 @@ void emitter::emitIns_R_R_R_R(instruction ins, case INS_sve_st1b: case INS_sve_st1h: - assert(insOptsScalableStandard(opt)); - assert(isVectorRegister(reg1)); // ttttt - assert(isPredicateRegister(reg2)); // ggg - assert(isGeneralRegister(reg3)); // nnnnn - assert(isGeneralRegister(reg4)); // mmmmm - assert(isScalableVectorSize(size)); // xx + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isScalableVectorSize(size)); #ifdef DEBUG if ((ins == INS_sve_st1h) && (opt == INS_OPTS_SCALABLE_B)) { assert(!"sve_st1h with scalable B is reserved"); } #endif // DEBUG - fmt = IF_SVE_JD_4A; + if (insOptsScalableStandard(opt)) + { + assert(isGeneralRegister(reg4)); + fmt = IF_SVE_JD_4A; + } + else + { + assert(isVectorRegister(reg4)); + switch (ins) + { + case INS_sve_st1h: + assert(insOpts32BitExtend(opt)); + if (insScalableOptsModN(sopt)) + { + fmt = IF_SVE_JJ_4A; + } + else + { + assert(insScalableOptsMod(sopt)); + fmt = IF_SVE_JJ_4A_B; + } + break; + + default: + assert(!"Invalid instruction"); + break; + } + } break; case INS_sve_st1w: - assert(insOptsScalableWords(opt)); - assert(isVectorRegister(reg1)); // ttttt - assert(isPredicateRegister(reg2)); // ggg - assert(isGeneralRegister(reg3)); // nnnnn - assert(isGeneralRegister(reg4)); // mmmmm - assert(isScalableVectorSize(size)); // x - fmt = IF_SVE_JD_4B; + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isScalableVectorSize(size)); + if (insOptsScalableStandard(opt)) + { + assert(isGeneralRegister(reg4)); + fmt = IF_SVE_JD_4B; + } + else + { + assert(isVectorRegister(reg4)); + switch (ins) + { + case INS_sve_st1w: + assert(insOpts32BitExtend(opt)); + if (insScalableOptsModN(sopt)) + { + fmt = IF_SVE_JJ_4A; + } + else + { + assert(insScalableOptsMod(sopt)); + fmt = IF_SVE_JJ_4A_B; + } + break; + + default: + assert(!"Invalid instruction"); + break; + } + } break; + case INS_sve_st1d: + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isVectorRegister(reg4)); + assert(isScalableVectorSize(size)); + assert(insOpts32BitExtend(opt)); + if (insScalableOptsModN(sopt)) + { + fmt = IF_SVE_JJ_4A; + } + else + { + assert(insScalableOptsMod(sopt)); + fmt = IF_SVE_JJ_4A_B; + } + break; + + //case INS_sve_st1h: + //case INS_sve_st1w: + //case INS_sve_st1d: + // assert(insOptsScalable(id->idInsOpt())); + // assert(isVectorRegister(reg10)); // mmmmm + // assert(isPredicateRegister(reg20)); // ggg + // assert(isVectorRegister(reg30)); // ttttt + // assert(isValidGeneralRegister(reg40)); // nnnnn + // assert(isValidImmShift(opt)); // h + // fmt = IF_SVE_JJ_4A; + // break; + + //case INS_sve_st1h: + //case INS_sve_st1w: + //case INS_sve_st1d: + // assert(insOptsScalable(id->idInsOpt())); + // assert(isVectorRegister(reg10)); // mmmmm + // assert(isPredicateRegister(reg20)); // ggg + // assert(isVectorRegister(reg30)); // ttttt + // assert(isValidGeneralRegister(reg40)); // nnnnn + // assert(isValidImmShift(opt)); // h + // fmt = IF_SVE_JJ_4A_B; + // break; + + //case INS_sve_st1h: + //case INS_sve_st1w: + // assert(insOptsScalable(id->idInsOpt())); + // assert(isVectorRegister(reg10)); // mmmmm + // assert(isPredicateRegister(reg20)); // ggg + // assert(isVectorRegister(reg30)); // ttttt + // assert(isValidGeneralRegister(reg40)); // nnnnn + // assert(isValidImmShift(opt)); // h + // fmt = IF_SVE_JJ_4A_C; + // break; + + //case INS_sve_st1h: + //case INS_sve_st1w: + // assert(insOptsScalable(id->idInsOpt())); + // assert(isVectorRegister(reg10)); // mmmmm + // assert(isPredicateRegister(reg20)); // ggg + // assert(isVectorRegister(reg30)); // ttttt + // assert(isValidGeneralRegister(reg40)); // nnnnn + // assert(isValidImmShift(opt)); // h + // fmt = IF_SVE_JJ_4A_D; + // break; + + //case INS_sve_st1b: + // assert(insOptsScalable(id->idInsOpt())); + // assert(isVectorRegister(reg10)); // mmmmm + // assert(isPredicateRegister(reg20)); // ggg + // assert(isVectorRegister(reg30)); // ttttt + // assert(isValidGeneralRegister(reg40)); // nnnnn + // assert(isValidImmShift(opt)); // h + // fmt = IF_SVE_JK_4A; + // break; + + //case INS_sve_st1b: + // assert(insOptsScalable(id->idInsOpt())); + // assert(isVectorRegister(reg10)); // mmmmm + // assert(isPredicateRegister(reg20)); // ggg + // assert(isVectorRegister(reg30)); // ttttt + // assert(isValidGeneralRegister(reg40)); // nnnnn + // assert(isValidImmShift(opt)); // h + // fmt = IF_SVE_JK_4A_B; + // break; + default: unreached(); break; @@ -16812,6 +17007,62 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_JJ_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_R_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeReg_V_20_to_16(id->idReg4()); // mmmmm + + switch (id->idInsOpt()) + { + case INS_OPTS_UXTW: + break; + + case INS_OPTS_SXTW: + code |= (1 << 14); // h + break; + + default: + assert(!"Invalid OPTS"); + break; + } + + dst += emitOutput_Instr(dst, code); + break; + //case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // // offsets) + //case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // // offsets) + //case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // // offsets) + //case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled + // // offsets) + //case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit + // // unscaled offsets) + // break; + + //case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + // code = emitInsCodeSve(ins, fmt); + // code |= insEncodeReg_P_12_to_10(id->idReg10()); // ggg + // code |= insEncodeReg_V_4_to_0(id->idReg20()); // ttttt + // code |= insEncodeReg_R_9_to_5(id->idReg30()); // nnnnn + // code |= insEncodeImm(); // iiii + // code |= insEncodeElemsize(id->idInsOpt()); // xx + // dst += emitOutput_Instr(dst, code); + // break; + + //case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + // code = emitInsCodeSve(ins, fmt); + // code |= insEncodeReg_P_12_to_10(id->idReg10()); // ggg + // code |= insEncodeReg_V_4_to_0(id->idReg20()); // ttttt + // code |= insEncodeReg_R_9_to_5(id->idReg30()); // nnnnn + // code |= insEncodeImm(); // iiii + // code |= insEncodeSveElemsize(id->idInsOpt()); // x + // dst += emitOutput_Instr(dst, code); + // break; + default: assert(!"Unexpected format"); break; @@ -19504,6 +19755,78 @@ void emitter::emitDispInsHelp( printf("LSL #2]"); break; + case IF_SVE_JJ_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + { + insOpts opt = INS_OPTS_NONE; + switch (ins) + { + case INS_sve_st1h: + case INS_sve_st1w: + opt = INS_OPTS_SCALABLE_S; + break; + + case INS_sve_st1d: + opt = INS_OPTS_SCALABLE_D; + break; + + default: + assert(!"Invalid instruction"); + break; + } + emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), opt, + true); // ttttt + emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, opt, true); // ggg + printf("["); + emitDispReg(id->idReg3(), EA_8BYTE, true); // nnnnn + emitDispSveReg(id->idReg4(), opt, true); // mmmmm + emitDispExtendOpts(id->idInsOpt()); + switch (ins) + { + case INS_sve_st1h: + printf(" #1]"); + break; + + case INS_sve_st1w: + printf(" #2]"); + break; + + case INS_sve_st1d: + printf(" #3]"); + break; + + default: + assert(!"Invalid instruction"); + break; + } + break; + } + //case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // // offsets) + //case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // // offsets) + //case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // // offsets) + //case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled + // // offsets) + //case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit + // // unscaled offsets) + // break; + + //case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + // emitDispPredicateReg(id->idReg10(), PREDICATE_ZERO, id->idInsOpt(), true); // ggg + // emitDispSveReg(id->idReg20(), id->idInsOpt(), true); // ttttt + // emitDispReg(id->idReg30(), id->idInsOpt(), true); // nnnnn + // emitDispImm(); // iiii + // break; + + //case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + // emitDispPredicateReg(id->idReg10(), PREDICATE_ZERO, id->idInsOpt(), true); // ggg + // emitDispSveReg(id->idReg20(), id->idInsOpt(), true); // ttttt + // emitDispReg(id->idReg30(), id->idInsOpt(), true); // nnnnn + // emitDispImm(); // iiii + // break; + default: printf("unexpected format %s", emitIfName(id->idInsFmt())); assert(!"unexpectedFormat"); @@ -22366,6 +22689,152 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; + case IF_SVE_JJ_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + switch (ins) + { + case INS_sve_st1h: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + case INS_sve_st1w: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + case INS_sve_st1d: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + switch (ins) + { + case INS_sve_st1h: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + case INS_sve_st1w: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + case INS_sve_st1d: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + switch (ins) + { + case INS_sve_st1h: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + case INS_sve_st1w: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + switch (ins) + { + case INS_sve_st1h: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + case INS_sve_st1w: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled + // offsets) + switch (ins) + { + case INS_sve_st1b: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit + // unscaled offsets) + switch (ins) + { + case INS_sve_st1b: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + switch (ins) + { + case INS_sve_st1b: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + case INS_sve_st1h: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + switch (ins) + { + case INS_sve_st1w: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 4e32c247fa170..23cdf493fc7f7 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -956,6 +956,11 @@ inline static bool insOptsLSExtend(insOpts opt) (opt == INS_OPTS_UXTX) || (opt == INS_OPTS_SXTX)); } +inline static bool insOpts32BitExtend(insOpts opt) +{ + return ((opt == INS_OPTS_UXTW) || (opt == INS_OPTS_SXTW)); +} + inline static bool insOpts64BitExtend(insOpts opt) { return ((opt == INS_OPTS_UXTX) || (opt == INS_OPTS_SXTX)); @@ -1049,6 +1054,18 @@ inline static bool insScalableOptsWithVectorLength(insScalableOpts sopt) return ((sopt == INS_SCALABLE_OPTS_VL_2X) || (sopt == INS_SCALABLE_OPTS_VL_4X)); } +inline static bool insScalableOptsMod(insScalableOpts sopt) +{ + // `sopt` denotes the instruction should be encoded with ''. + return sopt == INS_SCALABLE_OPTS_MOD; +} + +inline static bool insScalableOptsModN(insScalableOpts sopt) +{ + // `sopt` denotes the instruction should be encoded with ' #N'. + return sopt == INS_SCALABLE_OPTS_MOD_N; +} + static bool isValidImmCond(ssize_t imm); static bool isValidImmCondFlags(ssize_t imm); static bool isValidImmCondFlagsImm5(ssize_t imm); diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index a906ce2a5e440..d81740d29283f 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -316,6 +316,9 @@ enum insScalableOpts : unsigned INS_SCALABLE_OPTS_WITH_PREDICATE_PAIR, // Variants with {., .} predicate pair (eg whilege) INS_SCALABLE_OPTS_VL_2X, // Variants with a vector length specifier of 2x (eg whilege) INS_SCALABLE_OPTS_VL_4X, // Variants with a vector length specifier of 4x (eg whilege) + + INS_SCALABLE_OPTS_MOD, + INS_SCALABLE_OPTS_MOD_N, }; enum insCond : unsigned From e4f2708b61d364e688e7635fe8ceb5059104d550 Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 17 Jan 2024 17:38:50 -0800 Subject: [PATCH 04/17] Added SVE_JJ_4A_B format --- src/coreclr/jit/codegenarm64test.cpp | 20 ++++++++----- src/coreclr/jit/codegencommon.cpp | 6 ++-- src/coreclr/jit/emitarm64.cpp | 42 ++++++++++++++++++++++++++-- 3 files changed, 55 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 59189af6e8ee0..6f75f80b27e3b 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5848,13 +5848,19 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P3, REG_R1, REG_V2, INS_OPTS_SXTW, INS_SCALABLE_OPTS_MOD_N); // ST1W {.S }, , [, .S, #2] - //// IF_SVE_JJ_4A_B - //theEmitter->emitIns_R_R_R_R(INS_sve_st1d, EA_SCALABLE, REG_V3, REG_P1, REG_R2, REG_V5, - // INS_OPTS_SCALABLE_B); // ST1D {.D }, , [, .D, ] - //theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V2, REG_P3, REG_R1, REG_V4, - // INS_OPTS_SCALABLE_B); // ST1H {.D }, , [, .D, #1] - //theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V1, REG_P4, REG_R2, REG_V3, - // INS_OPTS_SCALABLE_B); // ST1W {.D }, , [, .D, #2] + // IF_SVE_JJ_4A_B + theEmitter->emitIns_R_R_R_R(INS_sve_st1d, EA_SCALABLE, REG_V3, REG_P1, REG_R2, REG_V5, INS_OPTS_UXTW, + INS_SCALABLE_OPTS_MOD); // ST1D {.D }, , [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1d, EA_SCALABLE, REG_V3, REG_P1, REG_R2, REG_V5, INS_OPTS_SXTW, + INS_SCALABLE_OPTS_MOD); // ST1D {.D }, , [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V2, REG_P3, REG_R1, REG_V4, INS_OPTS_UXTW, + INS_SCALABLE_OPTS_MOD_N); // ST1H {.D }, , [, .D, #1] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V2, REG_P3, REG_R1, REG_V4, INS_OPTS_SXTW, + INS_SCALABLE_OPTS_MOD_N); // ST1H {.D }, , [, .D, #1] + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V1, REG_P4, REG_R2, REG_V3, INS_OPTS_UXTW, + INS_SCALABLE_OPTS_MOD_N); // ST1W {.D }, , [, .D, #2] + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V1, REG_P4, REG_R2, REG_V3, INS_OPTS_SXTW, + INS_SCALABLE_OPTS_MOD_N); // ST1W {.D }, , [, .D, #2] //// IF_SVE_JJ_4A_C //theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V1, REG_P5, REG_R1, REG_V3, diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index f0471d242d812..c6e4f1333e6f3 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -2111,12 +2111,12 @@ void CodeGen::genEmitUnwindDebugGCandEH() #endif // defined(LATE_DISASM) || defined(DEBUG) #ifdef LATE_DISASM - getDisAssembler().disAsmCode((BYTE*)*codePtr, (BYTE*)codePtrRW, finalHotCodeSize, (BYTE*)coldCodePtr, - (BYTE*)coldCodePtrRW, finalColdCodeSize); + //getDisAssembler().disAsmCode((BYTE*)*codePtr, (BYTE*)codePtrRW, finalHotCodeSize, (BYTE*)coldCodePtr, + // (BYTE*)coldCodePtrRW, finalColdCodeSize); #endif // LATE_DISASM #ifdef DEBUG - if (JitConfig.JitRawHexCode().contains(compiler->info.compMethodHnd, compiler->info.compClassHnd, + if (compiler->opts.altJit && JitConfig.JitRawHexCode().contains(compiler->info.compMethodHnd, compiler->info.compClassHnd, &compiler->info.compMethodInfo->args)) { // NOTE: code in cold region is not supported. diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 47ed623faddc4..8ddb3e640178f 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -17009,6 +17009,16 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_SVE_JJ_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled // offsets) + case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // // offsets) + case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled + // offsets) + case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit + // unscaled offsets) code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg @@ -17031,8 +17041,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; - //case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // // offsets) //case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled // // offsets) //case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled @@ -19801,7 +19809,35 @@ void emitter::emitDispInsHelp( } break; } - //case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + { + emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), INS_OPTS_SCALABLE_D, + true); // ttttt + emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, INS_OPTS_SCALABLE_D, true); // ggg + printf("["); + emitDispReg(id->idReg3(), EA_8BYTE, true); // nnnnn + emitDispSveReg(id->idReg4(), INS_OPTS_SCALABLE_D, true); // mmmmm + emitDispExtendOpts(id->idInsOpt()); + switch (ins) + { + case INS_sve_st1h: + printf(" #1]"); + break; + + case INS_sve_st1w: + printf(" #2]"); + break; + + case INS_sve_st1d: + printf("]"); + break; + + default: + assert(!"Invalid instruction"); + break; + } + break; + } // // offsets) //case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled // // offsets) From 258ed98a3667ed17baa92d881753a6bf9a833d2c Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 17 Jan 2024 17:39:27 -0800 Subject: [PATCH 05/17] Revert minor change --- src/coreclr/jit/codegencommon.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index c6e4f1333e6f3..f0471d242d812 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -2111,12 +2111,12 @@ void CodeGen::genEmitUnwindDebugGCandEH() #endif // defined(LATE_DISASM) || defined(DEBUG) #ifdef LATE_DISASM - //getDisAssembler().disAsmCode((BYTE*)*codePtr, (BYTE*)codePtrRW, finalHotCodeSize, (BYTE*)coldCodePtr, - // (BYTE*)coldCodePtrRW, finalColdCodeSize); + getDisAssembler().disAsmCode((BYTE*)*codePtr, (BYTE*)codePtrRW, finalHotCodeSize, (BYTE*)coldCodePtr, + (BYTE*)coldCodePtrRW, finalColdCodeSize); #endif // LATE_DISASM #ifdef DEBUG - if (compiler->opts.altJit && JitConfig.JitRawHexCode().contains(compiler->info.compMethodHnd, compiler->info.compClassHnd, + if (JitConfig.JitRawHexCode().contains(compiler->info.compMethodHnd, compiler->info.compClassHnd, &compiler->info.compMethodInfo->args)) { // NOTE: code in cold region is not supported. From 32828bad15945766463764a8ffe1814a5d77a054 Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 17 Jan 2024 17:51:46 -0800 Subject: [PATCH 06/17] Minor cleanup --- src/coreclr/jit/codegenarm64test.cpp | 14 +++++++++----- src/coreclr/jit/emitarm64.cpp | 21 +++++++++++++++------ 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 6f75f80b27e3b..764567f9dfa3a 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5862,11 +5862,15 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V1, REG_P4, REG_R2, REG_V3, INS_OPTS_SXTW, INS_SCALABLE_OPTS_MOD_N); // ST1W {.D }, , [, .D, #2] - //// IF_SVE_JJ_4A_C - //theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V1, REG_P5, REG_R1, REG_V3, - // INS_OPTS_SCALABLE_B); // ST1H {.D }, , [, .D, ] - //theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, REG_V4, - // INS_OPTS_SCALABLE_B); // ST1W {.D }, , [, .D, ] + // IF_SVE_JJ_4A_C + //theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V1, REG_P5, REG_R1, REG_V3, INS_OPTS_UXTW, + // INS_SCALABLE_OPTS_MOD); // ST1H {.D }, , [, .D, ] + //theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V1, REG_P5, REG_R1, REG_V3, INS_OPTS_SXTW, + // INS_SCALABLE_OPTS_MOD); // ST1H {.D }, , [, .D, ] + //theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, REG_V4, INS_OPTS_UXTW, + // INS_SCALABLE_OPTS_MOD); // ST1W {.D }, , [, .D, ] + //theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, REG_V4, INS_OPTS_SXTW, + // INS_SCALABLE_OPTS_MOD); // ST1W {.D }, , [, .D, ] //// IF_SVE_JJ_4A_D //theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V7, REG_P5, REG_R4, REG_V1, diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 8ddb3e640178f..e31a79fc8e145 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1492,12 +1492,21 @@ void emitter::emitInsSanityCheck(instrDesc* id) // offsets) case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit // unscaled offsets) - //assert(insOptsScalable(id->idInsOpt())); - //assert(isVectorRegister(id->idReg10())); // mmmmm - //assert(isPredicateRegister(id->idReg20())); // ggg - //assert(isVectorRegister(id->idReg30())); // ttttt - //assert(isValidGeneralRegister(id->idReg40())); // nnnnn - //assert(isValidImmShift(id->idInsOpt())); // h + elemsize = id->idOpSize(); + assert(isVectorRegister(id->idReg1())); + assert(isPredicateRegister(id->idReg2())); + assert(isGeneralRegister(id->idReg3())); + assert(isScalableVectorSize(elemsize)); +#ifdef DEBUG + if (insOptsScalableStandard(id->idInsOpt())) + { + assert(isGeneralRegister(id->idReg4())); + } + else + { + assert(isVectorRegister(id->idReg4())); + } +#endif // DEBUG break; //case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) From 251404c6262938fb97692bf0a74d2d61beee2718 Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 17 Jan 2024 19:24:34 -0800 Subject: [PATCH 07/17] Cleanup. Fixed a few formats. Introduced new INS_OPTS_SCALABLE options for UXTW and SXTW --- src/coreclr/jit/codegenarm64test.cpp | 60 ++++---- src/coreclr/jit/emitarm64.cpp | 218 ++++++++++++++------------- src/coreclr/jit/emitarm64.h | 19 +-- src/coreclr/jit/instr.h | 7 +- 4 files changed, 151 insertions(+), 153 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 764567f9dfa3a..829c4eb765f74 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5821,56 +5821,52 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_OPTS_SCALABLE_S); // ST1B {.}, , [, ] theEmitter->emitIns_R_R_R_R(INS_sve_st1b, EA_SCALABLE, REG_V4, REG_P0, REG_R1, REG_R2, INS_OPTS_SCALABLE_D); // ST1B {.}, , [, ] - theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V5, REG_P6, REG_R1, REG_R2, - INS_OPTS_SCALABLE_H); // ST1H {.}, , [, , LSL #1] - theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V1, REG_P2, REG_R3, REG_R4, - INS_OPTS_SCALABLE_S); // ST1H {.}, , [, , LSL #1] - theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V3, REG_P2, REG_R4, REG_R0, - INS_OPTS_SCALABLE_D); // ST1H {.}, , [, , LSL #1] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V5, REG_P6, REG_R1, REG_R2, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_LSL_N); // ST1H {.}, , [, , LSL #1] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V1, REG_P2, REG_R3, REG_R4, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_LSL_N); // ST1H {.}, , [, , LSL #1] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V3, REG_P2, REG_R4, REG_R0, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // ST1H {.}, , [, , LSL #1] // IF_SVE_JD_4B - theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P1, REG_R2, REG_R3, - INS_OPTS_SCALABLE_S); // ST1W {.}, , [, , LSL #2] - theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V2, REG_P3, REG_R4, REG_R5, - INS_OPTS_SCALABLE_D); // ST1W {.}, , [, , LSL #2] + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P1, REG_R2, REG_R3, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_LSL_N); // ST1W {.}, , [, , LSL #2] + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V2, REG_P3, REG_R4, REG_R5, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // ST1W {.}, , [, , LSL #2] // IF_SVE_JJ_4A - theEmitter->emitIns_R_R_R_R(INS_sve_st1d, EA_SCALABLE, REG_V0, REG_P1, REG_R2, REG_V3, INS_OPTS_UXTW, + theEmitter->emitIns_R_R_R_R(INS_sve_st1d, EA_SCALABLE, REG_V0, REG_P1, REG_R2, REG_V3, INS_OPTS_SCALABLE_D_UXTW, INS_SCALABLE_OPTS_MOD_N); // ST1D {.D }, , [, .D, #3] - theEmitter->emitIns_R_R_R_R(INS_sve_st1d, EA_SCALABLE, REG_V0, REG_P1, REG_R2, REG_V3, INS_OPTS_SXTW, + theEmitter->emitIns_R_R_R_R(INS_sve_st1d, EA_SCALABLE, REG_V0, REG_P1, REG_R2, REG_V3, INS_OPTS_SCALABLE_D_SXTW, INS_SCALABLE_OPTS_MOD_N); // ST1D {.D }, , [, .D, #3] - theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V3, REG_P1, REG_R5, REG_V4, INS_OPTS_UXTW, + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V3, REG_P1, REG_R5, REG_V4, INS_OPTS_SCALABLE_S_UXTW, INS_SCALABLE_OPTS_MOD_N); // ST1H {.S }, , [, .S, #1] - theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V3, REG_P1, REG_R5, REG_V4, INS_OPTS_SXTW, + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V3, REG_P1, REG_R5, REG_V4, INS_OPTS_SCALABLE_S_SXTW, INS_SCALABLE_OPTS_MOD_N); // ST1H {.S }, , [, .S, #1] - theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P3, REG_R1, REG_V2, INS_OPTS_UXTW, + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P3, REG_R1, REG_V2, INS_OPTS_SCALABLE_S_UXTW, INS_SCALABLE_OPTS_MOD_N); // ST1W {.S }, , [, .S, #2] - theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P3, REG_R1, REG_V2, INS_OPTS_SXTW, + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P3, REG_R1, REG_V2, INS_OPTS_SCALABLE_S_SXTW, INS_SCALABLE_OPTS_MOD_N); // ST1W {.S }, , [, .S, #2] // IF_SVE_JJ_4A_B - theEmitter->emitIns_R_R_R_R(INS_sve_st1d, EA_SCALABLE, REG_V3, REG_P1, REG_R2, REG_V5, INS_OPTS_UXTW, - INS_SCALABLE_OPTS_MOD); // ST1D {.D }, , [, .D, ] - theEmitter->emitIns_R_R_R_R(INS_sve_st1d, EA_SCALABLE, REG_V3, REG_P1, REG_R2, REG_V5, INS_OPTS_SXTW, - INS_SCALABLE_OPTS_MOD); // ST1D {.D }, , [, .D, ] - theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V2, REG_P3, REG_R1, REG_V4, INS_OPTS_UXTW, + theEmitter->emitIns_R_R_R_R(INS_sve_st1d, EA_SCALABLE, REG_V3, REG_P1, REG_R2, REG_V5, + INS_OPTS_SCALABLE_D_UXTW); // ST1D {.D }, , [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1d, EA_SCALABLE, REG_V3, REG_P1, REG_R2, REG_V5, + INS_OPTS_SCALABLE_D_SXTW); // ST1D {.D }, , [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V2, REG_P3, REG_R1, REG_V4, INS_OPTS_SCALABLE_D_UXTW, INS_SCALABLE_OPTS_MOD_N); // ST1H {.D }, , [, .D, #1] - theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V2, REG_P3, REG_R1, REG_V4, INS_OPTS_SXTW, + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V2, REG_P3, REG_R1, REG_V4, INS_OPTS_SCALABLE_D_SXTW, INS_SCALABLE_OPTS_MOD_N); // ST1H {.D }, , [, .D, #1] - theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V1, REG_P4, REG_R2, REG_V3, INS_OPTS_UXTW, + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V1, REG_P4, REG_R2, REG_V3, INS_OPTS_SCALABLE_D_UXTW, INS_SCALABLE_OPTS_MOD_N); // ST1W {.D }, , [, .D, #2] - theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V1, REG_P4, REG_R2, REG_V3, INS_OPTS_SXTW, + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V1, REG_P4, REG_R2, REG_V3, INS_OPTS_SCALABLE_D_SXTW, INS_SCALABLE_OPTS_MOD_N); // ST1W {.D }, , [, .D, #2] // IF_SVE_JJ_4A_C - //theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V1, REG_P5, REG_R1, REG_V3, INS_OPTS_UXTW, - // INS_SCALABLE_OPTS_MOD); // ST1H {.D }, , [, .D, ] - //theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V1, REG_P5, REG_R1, REG_V3, INS_OPTS_SXTW, - // INS_SCALABLE_OPTS_MOD); // ST1H {.D }, , [, .D, ] - //theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, REG_V4, INS_OPTS_UXTW, - // INS_SCALABLE_OPTS_MOD); // ST1W {.D }, , [, .D, ] - //theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, REG_V4, INS_OPTS_SXTW, - // INS_SCALABLE_OPTS_MOD); // ST1W {.D }, , [, .D, ] + //theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V1, REG_P5, REG_R1, REG_V3, INS_OPTS_SCALABLE_D_UXTW); // ST1H {.D }, , [, .D, ] + //theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V1, REG_P5, REG_R1, REG_V3, INS_OPTS_SCALABLE_D_SXTW); // ST1H {.D }, , [, .D, ] + //theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, REG_V4, INS_OPTS_SCALABLE_D_UXTW); // ST1W {.D }, , [, .D, ] + //theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, REG_V4, INS_OPTS_SCALABLE_D_SXTW); // ST1W {.D }, , [, .D, ] //// IF_SVE_JJ_4A_D //theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V7, REG_P5, REG_R4, REG_V1, diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index e31a79fc8e145..737f8b608554c 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1493,20 +1493,11 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit // unscaled offsets) elemsize = id->idOpSize(); - assert(isVectorRegister(id->idReg1())); - assert(isPredicateRegister(id->idReg2())); - assert(isGeneralRegister(id->idReg3())); + assert(insOptsScalableStandard(id->idInsOpt()) || insOptsScalable32bitExtends(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn assert(isScalableVectorSize(elemsize)); -#ifdef DEBUG - if (insOptsScalableStandard(id->idInsOpt())) - { - assert(isGeneralRegister(id->idReg4())); - } - else - { - assert(isVectorRegister(id->idReg4())); - } -#endif // DEBUG break; //case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) @@ -5616,9 +5607,13 @@ emitter::code_t emitter::emitInsCodeSve(instruction ins, insFormat fmt) return EA_2BYTE; case INS_OPTS_SCALABLE_S: + case INS_OPTS_SCALABLE_S_UXTW: + case INS_OPTS_SCALABLE_S_SXTW: return EA_4BYTE; case INS_OPTS_SCALABLE_D: + case INS_OPTS_SCALABLE_D_UXTW: + case INS_OPTS_SCALABLE_D_SXTW: return EA_8BYTE; case INS_OPTS_SCALABLE_Q: @@ -11009,13 +11004,21 @@ void emitter::emitIns_R_R_R_R(instruction ins, break; case INS_sve_st1b: + assert(insOptsScalableStandard(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isScalableVectorSize(size)); + fmt = IF_SVE_JD_4A; + break; + case INS_sve_st1h: assert(isVectorRegister(reg1)); assert(isPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); assert(isScalableVectorSize(size)); #ifdef DEBUG - if ((ins == INS_sve_st1h) && (opt == INS_OPTS_SCALABLE_B)) + if (opt == INS_OPTS_SCALABLE_B) { assert(!"sve_st1h with scalable B is reserved"); } @@ -11023,28 +11026,28 @@ void emitter::emitIns_R_R_R_R(instruction ins, if (insOptsScalableStandard(opt)) { assert(isGeneralRegister(reg4)); + assert(sopt == INS_SCALABLE_OPTS_LSL_N); fmt = IF_SVE_JD_4A; } else { - assert(isVectorRegister(reg4)); - switch (ins) + assert(insOptsScalable32bitExtends(opt)); + switch (opt) { - case INS_sve_st1h: - assert(insOpts32BitExtend(opt)); - if (insScalableOptsModN(sopt)) - { - fmt = IF_SVE_JJ_4A; - } - else - { - assert(insScalableOptsMod(sopt)); - fmt = IF_SVE_JJ_4A_B; - } + case INS_OPTS_SCALABLE_S_UXTW: + case INS_OPTS_SCALABLE_S_SXTW: + assert(sopt == INS_SCALABLE_OPTS_MOD_N); + fmt = IF_SVE_JJ_4A; + break; + + case INS_OPTS_SCALABLE_D_UXTW: + case INS_OPTS_SCALABLE_D_SXTW: + assert(sopt == INS_SCALABLE_OPTS_MOD_N); + fmt = IF_SVE_JJ_4A_B; break; default: - assert(!"Invalid instruction"); + assert(!"Invalid options for scalable"); break; } } @@ -11058,48 +11061,57 @@ void emitter::emitIns_R_R_R_R(instruction ins, if (insOptsScalableStandard(opt)) { assert(isGeneralRegister(reg4)); + assert(sopt == INS_SCALABLE_OPTS_LSL_N); fmt = IF_SVE_JD_4B; } else { - assert(isVectorRegister(reg4)); - switch (ins) + assert(insOptsScalable32bitExtends(opt)); + switch (opt) { - case INS_sve_st1w: - assert(insOpts32BitExtend(opt)); - if (insScalableOptsModN(sopt)) - { - fmt = IF_SVE_JJ_4A; - } - else - { - assert(insScalableOptsMod(sopt)); - fmt = IF_SVE_JJ_4A_B; - } + case INS_OPTS_SCALABLE_S_UXTW: + case INS_OPTS_SCALABLE_S_SXTW: + assert(sopt == INS_SCALABLE_OPTS_MOD_N); + fmt = IF_SVE_JJ_4A; + break; + + case INS_OPTS_SCALABLE_D_UXTW: + case INS_OPTS_SCALABLE_D_SXTW: + assert(sopt == INS_SCALABLE_OPTS_MOD_N); + fmt = IF_SVE_JJ_4A_B; break; default: - assert(!"Invalid instruction"); + assert(!"Invalid options for scalable"); break; } } break; case INS_sve_st1d: + assert(insOptsScalable32bitExtends(opt)); assert(isVectorRegister(reg1)); assert(isPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); - assert(isVectorRegister(reg4)); assert(isScalableVectorSize(size)); - assert(insOpts32BitExtend(opt)); - if (insScalableOptsModN(sopt)) - { - fmt = IF_SVE_JJ_4A; - } - else + switch (opt) { - assert(insScalableOptsMod(sopt)); - fmt = IF_SVE_JJ_4A_B; + case INS_OPTS_SCALABLE_D_UXTW: + case INS_OPTS_SCALABLE_D_SXTW: + if (sopt == INS_SCALABLE_OPTS_MOD_N) + { + fmt = IF_SVE_JJ_4A; + } + else + { + assert(insScalableOptsNone(sopt)); + fmt = IF_SVE_JJ_4A_B; + } + break; + + default: + assert(!"Invalid options for scalable"); + break; } break; @@ -17036,15 +17048,12 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) switch (id->idInsOpt()) { - case INS_OPTS_UXTW: - break; - - case INS_OPTS_SXTW: + case INS_OPTS_SCALABLE_S_SXTW: + case INS_OPTS_SCALABLE_D_SXTW: code |= (1 << 14); // h break; default: - assert(!"Invalid OPTS"); break; } @@ -17503,6 +17512,31 @@ void emitter::emitDispExtendOpts(insOpts opt) assert(!"Bad value"); } +/***************************************************************************** + * + * Prints the encoding for the Extend Type encoding + */ + +void emitter::emitDispSveExtendOpts(insOpts opt) +{ + switch (opt) + { + case INS_OPTS_SCALABLE_S_UXTW: + case INS_OPTS_SCALABLE_D_UXTW: + printf("UXTW"); + break; + + case INS_OPTS_SCALABLE_S_SXTW: + case INS_OPTS_SCALABLE_D_SXTW: + printf("SXTW"); + break; + + default: + assert(!"Bad value"); + break; + } +} + /***************************************************************************** * * Prints the encoding for the Extend Type encoding in loads/stores @@ -17541,7 +17575,7 @@ void emitter::emitDispReg(regNumber reg, emitAttr attr, bool addComma) // void emitter::emitDispSveReg(regNumber reg, insOpts opt, bool addComma) { - assert(insOptsScalable(opt)); + assert(insOptsScalable(opt) || insOptsScalable32bitExtends(opt)); assert(isVectorRegister(reg)); printf(emitSveRegName(reg)); emitDispArrangement(opt); @@ -17771,6 +17805,8 @@ void emitter::emitDispArrangement(insOpts opt) str = "4s"; break; case INS_OPTS_SCALABLE_S: + case INS_OPTS_SCALABLE_S_UXTW: + case INS_OPTS_SCALABLE_S_SXTW: str = "s"; break; case INS_OPTS_1D: @@ -17780,6 +17816,8 @@ void emitter::emitDispArrangement(insOpts opt) str = "2d"; break; case INS_OPTS_SCALABLE_D: + case INS_OPTS_SCALABLE_D_UXTW: + case INS_OPTS_SCALABLE_D_SXTW: str = "d"; break; case INS_OPTS_SCALABLE_Q: @@ -19774,59 +19812,15 @@ void emitter::emitDispInsHelp( case IF_SVE_JJ_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled // offsets) - { - insOpts opt = INS_OPTS_NONE; - switch (ins) - { - case INS_sve_st1h: - case INS_sve_st1w: - opt = INS_OPTS_SCALABLE_S; - break; - - case INS_sve_st1d: - opt = INS_OPTS_SCALABLE_D; - break; - - default: - assert(!"Invalid instruction"); - break; - } - emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), opt, - true); // ttttt - emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, opt, true); // ggg - printf("["); - emitDispReg(id->idReg3(), EA_8BYTE, true); // nnnnn - emitDispSveReg(id->idReg4(), opt, true); // mmmmm - emitDispExtendOpts(id->idInsOpt()); - switch (ins) - { - case INS_sve_st1h: - printf(" #1]"); - break; - - case INS_sve_st1w: - printf(" #2]"); - break; - - case INS_sve_st1d: - printf(" #3]"); - break; - - default: - assert(!"Invalid instruction"); - break; - } - break; - } case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled { - emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), INS_OPTS_SCALABLE_D, - true); // ttttt - emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, INS_OPTS_SCALABLE_D, true); // ggg + emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), + true); // ttttt + emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, id->idInsOpt(), true); // ggg printf("["); - emitDispReg(id->idReg3(), EA_8BYTE, true); // nnnnn - emitDispSveReg(id->idReg4(), INS_OPTS_SCALABLE_D, true); // mmmmm - emitDispExtendOpts(id->idInsOpt()); + emitDispReg(id->idReg3(), EA_8BYTE, true); // nnnnn + emitDispSveReg(id->idReg4(), id->idInsOpt(), true); // mmmmm + emitDispSveExtendOpts(id->idInsOpt()); switch (ins) { case INS_sve_st1h: @@ -19838,7 +19832,14 @@ void emitter::emitDispInsHelp( break; case INS_sve_st1d: - printf("]"); + if (fmt == IF_SVE_JJ_4A_B) + { + printf("]"); + } + else + { + printf(" #3]"); + } break; default: @@ -19847,6 +19848,7 @@ void emitter::emitDispInsHelp( } break; } + // // offsets) //case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled // // offsets) diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 23cdf493fc7f7..d1a4aba4b85ed 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -46,6 +46,7 @@ void emitDispFlags(insCflags flags); void emitDispBarrier(insBarrier barrier); void emitDispShiftOpts(insOpts opt); void emitDispExtendOpts(insOpts opt); +void emitDispSveExtendOpts(insOpts opt); void emitDispLSExtendOpts(insOpts opt); void emitDispReg(regNumber reg, emitAttr attr, bool addComma); void emitDispSveReg(regNumber reg, insOpts opt, bool addComma); @@ -1036,6 +1037,12 @@ inline static bool insOptsScalableWide(insOpts opt) return ((opt == INS_OPTS_SCALABLE_B) || (opt == INS_OPTS_SCALABLE_H) || (opt == INS_OPTS_SCALABLE_S)); } +inline static bool insOptsScalable32bitExtends(insOpts opt) +{ + return ((opt == INS_OPTS_SCALABLE_S_UXTW) || (opt == INS_OPTS_SCALABLE_S_SXTW) || + (opt == INS_OPTS_SCALABLE_D_UXTW) || (opt == INS_OPTS_SCALABLE_D_SXTW)); +} + inline static bool insScalableOptsNone(insScalableOpts sopt) { // `sopt` is used for instructions with no extra encoding variants. @@ -1054,18 +1061,6 @@ inline static bool insScalableOptsWithVectorLength(insScalableOpts sopt) return ((sopt == INS_SCALABLE_OPTS_VL_2X) || (sopt == INS_SCALABLE_OPTS_VL_4X)); } -inline static bool insScalableOptsMod(insScalableOpts sopt) -{ - // `sopt` denotes the instruction should be encoded with ''. - return sopt == INS_SCALABLE_OPTS_MOD; -} - -inline static bool insScalableOptsModN(insScalableOpts sopt) -{ - // `sopt` denotes the instruction should be encoded with ' #N'. - return sopt == INS_SCALABLE_OPTS_MOD_N; -} - static bool isValidImmCond(ssize_t imm); static bool isValidImmCondFlags(ssize_t imm); static bool isValidImmCondFlagsImm5(ssize_t imm); diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index d81740d29283f..1c06be8ceed90 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -276,6 +276,11 @@ enum insOpts : unsigned INS_OPTS_SCALABLE_D, INS_OPTS_SCALABLE_Q, + INS_OPTS_SCALABLE_S_UXTW, + INS_OPTS_SCALABLE_S_SXTW, + INS_OPTS_SCALABLE_D_UXTW, + INS_OPTS_SCALABLE_D_SXTW, + INS_OPTS_MSL, // Vector Immediate (shifting ones variant) INS_OPTS_S_TO_4BYTE, // Single to INT32 @@ -317,7 +322,7 @@ enum insScalableOpts : unsigned INS_SCALABLE_OPTS_VL_2X, // Variants with a vector length specifier of 2x (eg whilege) INS_SCALABLE_OPTS_VL_4X, // Variants with a vector length specifier of 4x (eg whilege) - INS_SCALABLE_OPTS_MOD, + INS_SCALABLE_OPTS_LSL_N, INS_SCALABLE_OPTS_MOD_N, }; From 9504bae9e613ba6d82bb810eb8e26e3164b5191c Mon Sep 17 00:00:00 2001 From: TIHan Date: Thu, 18 Jan 2024 14:56:00 -0800 Subject: [PATCH 08/17] Added SVE_JJ_4A_C format --- src/coreclr/jit/codegenarm64test.cpp | 8 ++--- src/coreclr/jit/emitarm64.cpp | 46 ++++++++++++++++++++++------ 2 files changed, 41 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 829c4eb765f74..dd7c97bb29593 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5863,10 +5863,10 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_SCALABLE_OPTS_MOD_N); // ST1W {.D }, , [, .D, #2] // IF_SVE_JJ_4A_C - //theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V1, REG_P5, REG_R1, REG_V3, INS_OPTS_SCALABLE_D_UXTW); // ST1H {.D }, , [, .D, ] - //theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V1, REG_P5, REG_R1, REG_V3, INS_OPTS_SCALABLE_D_SXTW); // ST1H {.D }, , [, .D, ] - //theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, REG_V4, INS_OPTS_SCALABLE_D_UXTW); // ST1W {.D }, , [, .D, ] - //theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, REG_V4, INS_OPTS_SCALABLE_D_SXTW); // ST1W {.D }, , [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V1, REG_P5, REG_R1, REG_V3, INS_OPTS_SCALABLE_D_UXTW); // ST1H {.D }, , [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V1, REG_P5, REG_R1, REG_V3, INS_OPTS_SCALABLE_D_SXTW); // ST1H {.D }, , [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, REG_V4, INS_OPTS_SCALABLE_D_UXTW); // ST1W {.D }, , [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, REG_V4, INS_OPTS_SCALABLE_D_SXTW); // ST1W {.D }, , [, .D, ] //// IF_SVE_JJ_4A_D //theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V7, REG_P5, REG_R4, REG_V1, diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 737f8b608554c..801a9386fac80 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -11042,8 +11042,15 @@ void emitter::emitIns_R_R_R_R(instruction ins, case INS_OPTS_SCALABLE_D_UXTW: case INS_OPTS_SCALABLE_D_SXTW: - assert(sopt == INS_SCALABLE_OPTS_MOD_N); - fmt = IF_SVE_JJ_4A_B; + if (insScalableOptsNone(sopt)) + { + fmt = IF_SVE_JJ_4A_C; + } + else + { + assert(sopt == INS_SCALABLE_OPTS_MOD_N); + fmt = IF_SVE_JJ_4A_B; + } break; default: @@ -11077,8 +11084,15 @@ void emitter::emitIns_R_R_R_R(instruction ins, case INS_OPTS_SCALABLE_D_UXTW: case INS_OPTS_SCALABLE_D_SXTW: - assert(sopt == INS_SCALABLE_OPTS_MOD_N); - fmt = IF_SVE_JJ_4A_B; + if (insScalableOptsNone(sopt)) + { + fmt = IF_SVE_JJ_4A_C; + } + else + { + assert(sopt == INS_SCALABLE_OPTS_MOD_N); + fmt = IF_SVE_JJ_4A_B; + } break; default: @@ -19813,6 +19827,9 @@ void emitter::emitDispInsHelp( case IF_SVE_JJ_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled // offsets) case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) { emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt @@ -19824,11 +19841,25 @@ void emitter::emitDispInsHelp( switch (ins) { case INS_sve_st1h: - printf(" #1]"); + if (fmt == IF_SVE_JJ_4A_C) + { + printf("]"); + } + else + { + printf(" #1]"); + } break; case INS_sve_st1w: - printf(" #2]"); + if (fmt == IF_SVE_JJ_4A_C) + { + printf("]"); + } + else + { + printf(" #2]"); + } break; case INS_sve_st1d: @@ -19849,9 +19880,6 @@ void emitter::emitDispInsHelp( break; } - // // offsets) - //case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // // offsets) //case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled // // offsets) //case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled From 2092b95324c97bd3e344cc09ea944f07fdd5b523 Mon Sep 17 00:00:00 2001 From: TIHan Date: Thu, 18 Jan 2024 15:06:38 -0800 Subject: [PATCH 09/17] Added SVE_JJ_4A_D format --- src/coreclr/jit/codegenarm64test.cpp | 14 ++++++++----- src/coreclr/jit/emitarm64.cpp | 30 ++++++++++++++++++++-------- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index dd7c97bb29593..de14efb11fb97 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5868,11 +5868,15 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, REG_V4, INS_OPTS_SCALABLE_D_UXTW); // ST1W {.D }, , [, .D, ] theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, REG_V4, INS_OPTS_SCALABLE_D_SXTW); // ST1W {.D }, , [, .D, ] - //// IF_SVE_JJ_4A_D - //theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V7, REG_P5, REG_R4, REG_V1, - // INS_OPTS_SCALABLE_B); // ST1H {.S }, , [, .S, ] - //theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V1, REG_P2, REG_R3, REG_V2, - // INS_OPTS_SCALABLE_B); // ST1W {.S }, , [, .S, ] + // IF_SVE_JJ_4A_D + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V7, REG_P5, REG_R4, REG_V1, + INS_OPTS_SCALABLE_S_UXTW); // ST1H {.S }, , [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V7, REG_P5, REG_R4, REG_V1, + INS_OPTS_SCALABLE_S_SXTW); // ST1H {.S }, , [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V1, REG_P2, REG_R3, REG_V2, + INS_OPTS_SCALABLE_S_UXTW); // ST1W {.S }, , [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V1, REG_P2, REG_R3, REG_V2, + INS_OPTS_SCALABLE_S_SXTW); // ST1W {.S }, , [, .S, ] //// IF_SVE_JK_4A //theEmitter->emitIns_R_R_R_R(INS_sve_st1b, EA_SCALABLE, REG_V4, REG_P2, REG_R0, REG_V1, diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 801a9386fac80..10a7130339c99 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -11036,8 +11036,15 @@ void emitter::emitIns_R_R_R_R(instruction ins, { case INS_OPTS_SCALABLE_S_UXTW: case INS_OPTS_SCALABLE_S_SXTW: - assert(sopt == INS_SCALABLE_OPTS_MOD_N); - fmt = IF_SVE_JJ_4A; + if (insScalableOptsNone(sopt)) + { + fmt = IF_SVE_JJ_4A_D; + } + else + { + assert(sopt == INS_SCALABLE_OPTS_MOD_N); + fmt = IF_SVE_JJ_4A; + } break; case INS_OPTS_SCALABLE_D_UXTW: @@ -11078,8 +11085,15 @@ void emitter::emitIns_R_R_R_R(instruction ins, { case INS_OPTS_SCALABLE_S_UXTW: case INS_OPTS_SCALABLE_S_SXTW: - assert(sopt == INS_SCALABLE_OPTS_MOD_N); - fmt = IF_SVE_JJ_4A; + if (insScalableOptsNone(sopt)) + { + fmt = IF_SVE_JJ_4A_D; + } + else + { + assert(sopt == INS_SCALABLE_OPTS_MOD_N); + fmt = IF_SVE_JJ_4A; + } break; case INS_OPTS_SCALABLE_D_UXTW: @@ -19830,6 +19844,8 @@ void emitter::emitDispInsHelp( // offsets) case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled // offsets) + case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) { emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt @@ -19841,7 +19857,7 @@ void emitter::emitDispInsHelp( switch (ins) { case INS_sve_st1h: - if (fmt == IF_SVE_JJ_4A_C) + if ((fmt == IF_SVE_JJ_4A_C) || (fmt == IF_SVE_JJ_4A_D)) { printf("]"); } @@ -19852,7 +19868,7 @@ void emitter::emitDispInsHelp( break; case INS_sve_st1w: - if (fmt == IF_SVE_JJ_4A_C) + if ((fmt == IF_SVE_JJ_4A_C) || (fmt == IF_SVE_JJ_4A_D)) { printf("]"); } @@ -19880,8 +19896,6 @@ void emitter::emitDispInsHelp( break; } - //case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // // offsets) //case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled // // offsets) //case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit From 36b9edb24b9a4dd2d1e3a7ca10a6cc2f09bda851 Mon Sep 17 00:00:00 2001 From: TIHan Date: Thu, 18 Jan 2024 15:28:54 -0800 Subject: [PATCH 10/17] Added SVE_JK_4A and SVE_JK_4A_B formats --- src/coreclr/jit/codegenarm64test.cpp | 18 +++-- src/coreclr/jit/emitarm64.cpp | 117 ++++++++------------------- src/coreclr/jit/emitarm64.h | 5 -- 3 files changed, 45 insertions(+), 95 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index de14efb11fb97..ab89d9cdf8a3c 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5878,13 +5878,17 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V1, REG_P2, REG_R3, REG_V2, INS_OPTS_SCALABLE_S_SXTW); // ST1W {.S }, , [, .S, ] - //// IF_SVE_JK_4A - //theEmitter->emitIns_R_R_R_R(INS_sve_st1b, EA_SCALABLE, REG_V4, REG_P2, REG_R0, REG_V1, - // INS_OPTS_SCALABLE_B); // ST1B {.D }, , [, .D, ] - - //// IF_SVE_JK_4A_B - //theEmitter->emitIns_R_R_R_R(INS_sve_st1b, EA_SCALABLE, REG_V1, REG_P4, REG_R3, REG_V0, - // INS_OPTS_SCALABLE_B); // ST1B {.S }, , [, .S, ] + // IF_SVE_JK_4A + theEmitter->emitIns_R_R_R_R(INS_sve_st1b, EA_SCALABLE, REG_V4, REG_P2, REG_R0, REG_V1, + INS_OPTS_SCALABLE_D_UXTW); // ST1B {.D }, , [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1b, EA_SCALABLE, REG_V4, REG_P2, REG_R0, REG_V1, + INS_OPTS_SCALABLE_D_SXTW); // ST1B {.D }, , [, .D, ] + + // IF_SVE_JK_4A_B + theEmitter->emitIns_R_R_R_R(INS_sve_st1b, EA_SCALABLE, REG_V1, REG_P4, REG_R3, REG_V0, + INS_OPTS_SCALABLE_S_UXTW); // ST1B {.S }, , [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1b, EA_SCALABLE, REG_V1, REG_P4, REG_R3, REG_V0, + INS_OPTS_SCALABLE_S_SXTW); // ST1B {.S }, , [, .S, ] //// IF_SVE_JN_3A //theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V3, REG_P2, REG_R1, 5, diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 10a7130339c99..3bfef8a37d60f 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -11004,12 +11004,36 @@ void emitter::emitIns_R_R_R_R(instruction ins, break; case INS_sve_st1b: - assert(insOptsScalableStandard(opt)); assert(isVectorRegister(reg1)); assert(isPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); assert(isScalableVectorSize(size)); - fmt = IF_SVE_JD_4A; + assert(insScalableOptsNone(sopt)); + if (insOptsScalableStandard(opt)) + { + assert(isGeneralRegister(reg4)); + fmt = IF_SVE_JD_4A; + } + else + { + assert(insOptsScalable32bitExtends(opt)); + switch (opt) + { + case INS_OPTS_SCALABLE_S_UXTW: + case INS_OPTS_SCALABLE_S_SXTW: + fmt = IF_SVE_JK_4A_B; + break; + + case INS_OPTS_SCALABLE_D_UXTW: + case INS_OPTS_SCALABLE_D_SXTW: + fmt = IF_SVE_JK_4A; + break; + + default: + assert(!"Invalid options for scalable"); + break; + } + } break; case INS_sve_st1h: @@ -11143,72 +11167,6 @@ void emitter::emitIns_R_R_R_R(instruction ins, } break; - //case INS_sve_st1h: - //case INS_sve_st1w: - //case INS_sve_st1d: - // assert(insOptsScalable(id->idInsOpt())); - // assert(isVectorRegister(reg10)); // mmmmm - // assert(isPredicateRegister(reg20)); // ggg - // assert(isVectorRegister(reg30)); // ttttt - // assert(isValidGeneralRegister(reg40)); // nnnnn - // assert(isValidImmShift(opt)); // h - // fmt = IF_SVE_JJ_4A; - // break; - - //case INS_sve_st1h: - //case INS_sve_st1w: - //case INS_sve_st1d: - // assert(insOptsScalable(id->idInsOpt())); - // assert(isVectorRegister(reg10)); // mmmmm - // assert(isPredicateRegister(reg20)); // ggg - // assert(isVectorRegister(reg30)); // ttttt - // assert(isValidGeneralRegister(reg40)); // nnnnn - // assert(isValidImmShift(opt)); // h - // fmt = IF_SVE_JJ_4A_B; - // break; - - //case INS_sve_st1h: - //case INS_sve_st1w: - // assert(insOptsScalable(id->idInsOpt())); - // assert(isVectorRegister(reg10)); // mmmmm - // assert(isPredicateRegister(reg20)); // ggg - // assert(isVectorRegister(reg30)); // ttttt - // assert(isValidGeneralRegister(reg40)); // nnnnn - // assert(isValidImmShift(opt)); // h - // fmt = IF_SVE_JJ_4A_C; - // break; - - //case INS_sve_st1h: - //case INS_sve_st1w: - // assert(insOptsScalable(id->idInsOpt())); - // assert(isVectorRegister(reg10)); // mmmmm - // assert(isPredicateRegister(reg20)); // ggg - // assert(isVectorRegister(reg30)); // ttttt - // assert(isValidGeneralRegister(reg40)); // nnnnn - // assert(isValidImmShift(opt)); // h - // fmt = IF_SVE_JJ_4A_D; - // break; - - //case INS_sve_st1b: - // assert(insOptsScalable(id->idInsOpt())); - // assert(isVectorRegister(reg10)); // mmmmm - // assert(isPredicateRegister(reg20)); // ggg - // assert(isVectorRegister(reg30)); // ttttt - // assert(isValidGeneralRegister(reg40)); // nnnnn - // assert(isValidImmShift(opt)); // h - // fmt = IF_SVE_JK_4A; - // break; - - //case INS_sve_st1b: - // assert(insOptsScalable(id->idInsOpt())); - // assert(isVectorRegister(reg10)); // mmmmm - // assert(isPredicateRegister(reg20)); // ggg - // assert(isVectorRegister(reg30)); // ttttt - // assert(isValidGeneralRegister(reg40)); // nnnnn - // assert(isValidImmShift(opt)); // h - // fmt = IF_SVE_JK_4A_B; - // break; - default: unreached(); break; @@ -17087,15 +17045,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; - //case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // // offsets) - //case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // // offsets) - //case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled - // // offsets) - //case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit - // // unscaled offsets) - // break; //case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) // code = emitInsCodeSve(ins, fmt); @@ -19846,6 +19795,10 @@ void emitter::emitDispInsHelp( // offsets) case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled // offsets) + case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled + // offsets) + case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit + // unscaled offsets) { emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt @@ -19856,6 +19809,10 @@ void emitter::emitDispInsHelp( emitDispSveExtendOpts(id->idInsOpt()); switch (ins) { + case INS_sve_st1b: + printf("]"); + break; + case INS_sve_st1h: if ((fmt == IF_SVE_JJ_4A_C) || (fmt == IF_SVE_JJ_4A_D)) { @@ -19896,12 +19853,6 @@ void emitter::emitDispInsHelp( break; } - //case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled - // // offsets) - //case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit - // // unscaled offsets) - // break; - //case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) // emitDispPredicateReg(id->idReg10(), PREDICATE_ZERO, id->idInsOpt(), true); // ggg // emitDispSveReg(id->idReg20(), id->idInsOpt(), true); // ttttt diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index d1a4aba4b85ed..7a660c3a05b44 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -957,11 +957,6 @@ inline static bool insOptsLSExtend(insOpts opt) (opt == INS_OPTS_UXTX) || (opt == INS_OPTS_SXTX)); } -inline static bool insOpts32BitExtend(insOpts opt) -{ - return ((opt == INS_OPTS_UXTW) || (opt == INS_OPTS_SXTW)); -} - inline static bool insOpts64BitExtend(insOpts opt) { return ((opt == INS_OPTS_UXTX) || (opt == INS_OPTS_SXTX)); From 5463ca3559ff8cb7f27f401f19dc1d73f436a5d0 Mon Sep 17 00:00:00 2001 From: TIHan Date: Thu, 18 Jan 2024 15:54:28 -0800 Subject: [PATCH 11/17] Added SVE_JN_3A format --- src/coreclr/jit/codegenarm64test.cpp | 20 +++++-- src/coreclr/jit/emitarm64.cpp | 82 ++++++++++++++++------------ 2 files changed, 63 insertions(+), 39 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index ab89d9cdf8a3c..b0a56a51ef5b1 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5890,11 +5890,21 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_R(INS_sve_st1b, EA_SCALABLE, REG_V1, REG_P4, REG_R3, REG_V0, INS_OPTS_SCALABLE_S_SXTW); // ST1B {.S }, , [, .S, ] - //// IF_SVE_JN_3A - //theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V3, REG_P2, REG_R1, 5, - // INS_OPTS_SCALABLE_B); // ST1B {.}, , [{, #, MUL VL}] - //theEmitter->emitIns_R_R_R_I(INS_sve_st1h, EA_SCALABLE, REG_V0, REG_P3, REG_R4, 5, - // INS_OPTS_SCALABLE_B); // ST1H {.}, , [{, #, MUL VL}] + // IF_SVE_JN_3A + theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V3, REG_P2, REG_R1, 5, + INS_OPTS_SCALABLE_B); // ST1B {.}, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V3, REG_P2, REG_R1, 4, + INS_OPTS_SCALABLE_H); // ST1B {.}, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1h, EA_SCALABLE, REG_V0, REG_P3, REG_R4, 3, + INS_OPTS_SCALABLE_H); // ST1H {.}, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V3, REG_P2, REG_R1, 2, + INS_OPTS_SCALABLE_S); // ST1B {.}, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1h, EA_SCALABLE, REG_V0, REG_P3, REG_R4, 1, + INS_OPTS_SCALABLE_S); // ST1H {.}, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V3, REG_P2, REG_R1, 0, + INS_OPTS_SCALABLE_D); // ST1B {.}, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1h, EA_SCALABLE, REG_V0, REG_P3, REG_R4, -2, + INS_OPTS_SCALABLE_D); // ST1H {.}, , [{, #, MUL VL}] //// IF_SVE_JN_3B //theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V2, REG_P1, REG_R3, 5, diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 3bfef8a37d60f..dc455fa28c84f 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1500,15 +1500,16 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isScalableVectorSize(elemsize)); break; - //case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) - // elemsize = id->idOpSize(); - // assert(insOptsScalable(id->idInsOpt())); - // assert(isPredicateRegister(id->idReg10())); // ggg - // assert(isVectorRegister(id->idReg20())); // ttttt - // assert(isValidGeneralRegister(id->idReg30())); // nnnnn - // assert(isValidImm()); // iiii - // assert(isValidVectorElemsize(id->idInsOpt())); // xx - // break; + case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + imm = emitGetInsSC(id); + elemsize = id->idOpSize(); + assert(insOptsScalable(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isScalableVectorSize(elemsize)); // xx + assert(isValidSimm4(imm)); // iiii + break; //case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) // elemsize = id->idOpSize(); @@ -10453,16 +10454,21 @@ void emitter::emitIns_R_R_R_I(instruction ins, fmt = IF_SVE_JO_3A; break; - //case INS_sve_st1b: - //case INS_sve_st1h: - // assert(insOptsScalable(id->idInsOpt())); - // assert(isPredicateRegister(reg10)); // ggg - // assert(isVectorRegister(reg20)); // ttttt - // assert(isValidGeneralRegister(reg30)); // nnnnn - // assert(isValidImm()); // iiii - // assert(isValidVectorElemsize(opt)); // xx - // fmt = IF_SVE_JN_3A; - // break; + case INS_sve_st1b: + case INS_sve_st1h: + assert(insOptsScalableStandard(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); +#ifdef DEBUG + if ((ins == INS_sve_st1h) && (opt == INS_OPTS_SCALABLE_B)) + { + assert(!"sve_st1h with scalable B is reserved"); + } +#endif // DEBUG + fmt = IF_SVE_JN_3A; + break; //case INS_sve_st1w: // assert(insOptsScalable(id->idInsOpt())); @@ -17046,15 +17052,16 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; - //case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) - // code = emitInsCodeSve(ins, fmt); - // code |= insEncodeReg_P_12_to_10(id->idReg10()); // ggg - // code |= insEncodeReg_V_4_to_0(id->idReg20()); // ttttt - // code |= insEncodeReg_R_9_to_5(id->idReg30()); // nnnnn - // code |= insEncodeImm(); // iiii - // code |= insEncodeElemsize(id->idInsOpt()); // xx - // dst += emitOutput_Instr(dst, code); - // break; + case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_R_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeSimm4_19_to_16(imm); // iiii + code |= insEncodeSveElemsize_22_to_21(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; //case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) // code = emitInsCodeSve(ins, fmt); @@ -19853,12 +19860,19 @@ void emitter::emitDispInsHelp( break; } - //case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) - // emitDispPredicateReg(id->idReg10(), PREDICATE_ZERO, id->idInsOpt(), true); // ggg - // emitDispSveReg(id->idReg20(), id->idInsOpt(), true); // ttttt - // emitDispReg(id->idReg30(), id->idInsOpt(), true); // nnnnn - // emitDispImm(); // iiii - // break; + case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + imm = emitGetInsSC(id); + emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + printf("["); + emitDispReg(id->idReg3(), EA_8BYTE, imm != 0); // nnnnn + if (imm != 0) + { + emitDispImm(emitGetInsSC(id), true); // iiii + printf("mul vl"); + } + printf("]"); + break; //case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) // emitDispPredicateReg(id->idReg10(), PREDICATE_ZERO, id->idInsOpt(), true); // ggg From 97b1bc0ed488295b535431206e323f7d0b968971 Mon Sep 17 00:00:00 2001 From: TIHan Date: Thu, 18 Jan 2024 16:25:41 -0800 Subject: [PATCH 12/17] Added SVE_JN_3B format --- src/coreclr/jit/codegenarm64test.cpp | 8 ++- src/coreclr/jit/emitarm64.cpp | 103 ++++++++++++++++----------- 2 files changed, 67 insertions(+), 44 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index b0a56a51ef5b1..7f5f85da03286 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5906,9 +5906,11 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_I(INS_sve_st1h, EA_SCALABLE, REG_V0, REG_P3, REG_R4, -2, INS_OPTS_SCALABLE_D); // ST1H {.}, , [{, #, MUL VL}] - //// IF_SVE_JN_3B - //theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V2, REG_P1, REG_R3, 5, - // INS_OPTS_SCALABLE_B); // ST1W {.}, , [{, #, MUL VL}] + // IF_SVE_JN_3B + theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V2, REG_P1, REG_R3, 5, + INS_OPTS_SCALABLE_S); // ST1W {.}, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V2, REG_P1, REG_R3, 1, + INS_OPTS_SCALABLE_D); // ST1W {.}, , [{, #, MUL VL}] } #endif // defined(TARGET_ARM64) && defined(DEBUG) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index dc455fa28c84f..8dded1b696e5e 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1503,7 +1503,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) imm = emitGetInsSC(id); elemsize = id->idOpSize(); - assert(insOptsScalable(id->idInsOpt())); + assert(insOptsScalableStandard(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // ttttt assert(isPredicateRegister(id->idReg2())); // ggg assert(isGeneralRegister(id->idReg3())); // nnnnn @@ -1511,15 +1511,16 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidSimm4(imm)); // iiii break; - //case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) - // elemsize = id->idOpSize(); - // assert(insOptsScalable(id->idInsOpt())); - // assert(isPredicateRegister(id->idReg10())); // ggg - // assert(isVectorRegister(id->idReg20())); // ttttt - // assert(isValidGeneralRegister(id->idReg30())); // nnnnn - // assert(isValidImm()); // iiii - // assert(isValidVectorElemsize(id->idInsOpt())); // x - // break; + case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + imm = emitGetInsSC(id); + elemsize = id->idOpSize(); + assert(insOptsScalableWords(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isScalableVectorSize(elemsize)); // x + assert(isValidSimm4(imm)); // iiii + break; default: printf("unexpected format %s\n", emitIfName(id->idInsFmt())); @@ -10359,17 +10360,24 @@ void emitter::emitIns_R_R_R_I(instruction ins, } else { -#if DEBUG - if (ins == INS_sve_st1w) + if ((ins == INS_sve_st1w) && insOptsScalableWords(opt)) { - assert(opt == INS_OPTS_SCALABLE_Q); + fmt = IF_SVE_JN_3B; } else { - assert(opt == INS_OPTS_SCALABLE_D); - } +#if DEBUG + if (ins == INS_sve_st1w) + { + assert(opt == INS_OPTS_SCALABLE_Q); + } + else + { + assert(opt == INS_OPTS_SCALABLE_D); + } #endif // DEBUG - fmt = IF_SVE_JN_3C; + fmt = IF_SVE_JN_3C; + } } break; @@ -10470,16 +10478,6 @@ void emitter::emitIns_R_R_R_I(instruction ins, fmt = IF_SVE_JN_3A; break; - //case INS_sve_st1w: - // assert(insOptsScalable(id->idInsOpt())); - // assert(isPredicateRegister(reg10)); // ggg - // assert(isVectorRegister(reg20)); // ttttt - // assert(isValidGeneralRegister(reg30)); // nnnnn - // assert(isValidImm()); // iiii - // assert(isValidVectorElemsize(opt)); // x - // fmt = IF_SVE_JN_3B; - // break; - default: unreached(); break; @@ -17063,15 +17061,16 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; - //case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) - // code = emitInsCodeSve(ins, fmt); - // code |= insEncodeReg_P_12_to_10(id->idReg10()); // ggg - // code |= insEncodeReg_V_4_to_0(id->idReg20()); // ttttt - // code |= insEncodeReg_R_9_to_5(id->idReg30()); // nnnnn - // code |= insEncodeImm(); // iiii - // code |= insEncodeSveElemsize(id->idInsOpt()); // x - // dst += emitOutput_Instr(dst, code); - // break; + case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_R_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeSimm4_19_to_16(imm); // iiii + code |= insEncodeSveElemsize_sz_21(optGetSveElemsize(id->idInsOpt())); // x + dst += emitOutput_Instr(dst, code); + break; default: assert(!"Unexpected format"); @@ -19768,6 +19767,8 @@ void emitter::emitDispInsHelp( printf("]"); break; + // {.}, , [, ] + // {.}, , [, , LSL #1] case IF_SVE_JD_4A: // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, id->idInsOpt(), true); // ggg @@ -19785,6 +19786,7 @@ void emitter::emitDispInsHelp( } break; + // {.}, , [, , LSL #2] case IF_SVE_JD_4B: // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, id->idInsOpt(), true); // ggg @@ -19794,16 +19796,26 @@ void emitter::emitDispInsHelp( printf("LSL #2]"); break; + // {.D }, , [, .D, #3] + // {.S }, , [, .S, #1] + // {.S }, , [, .S, #2] case IF_SVE_JJ_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled // offsets) + // {.D }, , [, .D, ] + // {.D }, , [, .D, #1] + // {.D }, , [, .D, #2] case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled // offsets) + // {.D }, , [, .D, ] case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled // offsets) + // {.S }, , [, .S, ] case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled // offsets) + // {.D }, , [, .D, ] case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled // offsets) + // {.S }, , [, .S, ] case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit // unscaled offsets) { @@ -19860,6 +19872,7 @@ void emitter::emitDispInsHelp( break; } + // {.}, , [{, #, MUL VL}] case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) imm = emitGetInsSC(id); emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt @@ -19874,12 +19887,20 @@ void emitter::emitDispInsHelp( printf("]"); break; - //case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) - // emitDispPredicateReg(id->idReg10(), PREDICATE_ZERO, id->idInsOpt(), true); // ggg - // emitDispSveReg(id->idReg20(), id->idInsOpt(), true); // ttttt - // emitDispReg(id->idReg30(), id->idInsOpt(), true); // nnnnn - // emitDispImm(); // iiii - // break; + // {.}, , [{, #, MUL VL}] + case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + imm = emitGetInsSC(id); + emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + printf("["); + emitDispReg(id->idReg3(), EA_8BYTE, imm != 0); // nnnnn + if (imm != 0) + { + emitDispImm(emitGetInsSC(id), true); // iiii + printf("mul vl"); + } + printf("]"); + break; default: printf("unexpected format %s", emitIfName(id->idInsFmt())); From 4f393933c3ce0daf6651366dd32a2eb1fee0ee7c Mon Sep 17 00:00:00 2001 From: TIHan Date: Thu, 18 Jan 2024 16:28:15 -0800 Subject: [PATCH 13/17] Some comments --- src/coreclr/jit/instr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 1c06be8ceed90..78ba652fc5707 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -322,8 +322,8 @@ enum insScalableOpts : unsigned INS_SCALABLE_OPTS_VL_2X, // Variants with a vector length specifier of 2x (eg whilege) INS_SCALABLE_OPTS_VL_4X, // Variants with a vector length specifier of 4x (eg whilege) - INS_SCALABLE_OPTS_LSL_N, - INS_SCALABLE_OPTS_MOD_N, + INS_SCALABLE_OPTS_LSL_N, // Variants with a LSL #N (eg {.}, , [, , LSL #2]) + INS_SCALABLE_OPTS_MOD_N, // Variants with a #N (eg {.S }, , [, .S, #2]) }; enum insCond : unsigned From 6be93fe363667f6ba826ea77f48962183679a089 Mon Sep 17 00:00:00 2001 From: TIHan Date: Mon, 22 Jan 2024 10:36:18 -0800 Subject: [PATCH 14/17] Feedback --- src/coreclr/jit/emitarm64.cpp | 211 ++++------------------------------ 1 file changed, 21 insertions(+), 190 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index a83e670d4426f..a6410bcea8c0c 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1508,12 +1508,9 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isGeneralRegister(id->idReg3())); // nnnnn assert(isGeneralRegister(id->idReg4())); // mmmmm assert(isScalableVectorSize(elemsize)); // xx -#ifdef DEBUG - if ((id->idIns() == INS_sve_st1h) && (id->idInsOpt() == INS_OPTS_SCALABLE_B)) - { - assert(!"sve_st1h with scalable B is reserved"); - } -#endif // DEBUG + // st1h is reserved for scalable B + assert((id->idIns() == INS_sve_st1h) ? insOptsScalableAtLeastHalf(id->idInsOpt()) + : insOptsScalableStandard(id->idInsOpt())); break; case IF_SVE_JD_4B: // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) @@ -1538,8 +1535,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) // offsets) case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit // unscaled offsets) - elemsize = id->idOpSize(); - assert(insOptsScalableStandard(id->idInsOpt()) || insOptsScalable32bitExtends(id->idInsOpt())); + assert(insOptsScalable32bitExtends(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // ttttt assert(isPredicateRegister(id->idReg2())); // ggg assert(isGeneralRegister(id->idReg3())); // nnnnn @@ -10614,12 +10610,8 @@ void emitter::emitIns_R_R_R_I(instruction ins, assert(isPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); assert(isValidSimm4(imm)); -#ifdef DEBUG - if ((ins == INS_sve_st1h) && (opt == INS_OPTS_SCALABLE_B)) - { - assert(!"sve_st1h with scalable B is reserved"); - } -#endif // DEBUG + // st1h is reserved for scalable B + assert((ins == INS_sve_st1h) ? insOptsScalableAtLeastHalf(opt) : insOptsScalableStandard(opt)); fmt = IF_SVE_JN_3A; break; @@ -11190,12 +11182,9 @@ void emitter::emitIns_R_R_R_R(instruction ins, assert(isPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); assert(isScalableVectorSize(size)); -#ifdef DEBUG - if (opt == INS_OPTS_SCALABLE_B) - { - assert(!"sve_st1h with scalable B is reserved"); - } -#endif // DEBUG + // st1h is reserved for scalable B + assert((ins == INS_sve_st1h) ? insOptsScalableAtLeastHalf(opt) + : insOptsScalableStandard(opt)); if (insOptsScalableStandard(opt)) { assert(isGeneralRegister(reg4)); @@ -17725,12 +17714,12 @@ void emitter::emitDispSveExtendOpts(insOpts opt) { case INS_OPTS_SCALABLE_S_UXTW: case INS_OPTS_SCALABLE_D_UXTW: - printf("UXTW"); + printf("uxtw"); break; case INS_OPTS_SCALABLE_S_SXTW: case INS_OPTS_SCALABLE_D_SXTW: - printf("SXTW"); + printf("sxtw"); break; default: @@ -20031,7 +20020,7 @@ void emitter::emitDispInsHelp( if (ins == INS_sve_st1h) { emitDispReg(id->idReg4(), EA_8BYTE, true); // mmmmm - printf("LSL #1]"); + printf("lsl #1]"); } else { @@ -20047,7 +20036,7 @@ void emitter::emitDispInsHelp( printf("["); emitDispReg(id->idReg3(), EA_8BYTE, true); // nnnnn emitDispReg(id->idReg4(), EA_8BYTE, true); // mmmmm - printf("LSL #2]"); + printf("lsl #2]"); break; // {.D }, , [, .D, #3] @@ -23016,182 +23005,24 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; - case IF_SVE_JD_4A: // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) - switch (ins) - { - case INS_sve_st1b: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - case INS_sve_st1h: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } - break; - - case IF_SVE_JD_4B: // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) - switch (ins) - { - case INS_sve_st1w: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } - break; - - case IF_SVE_JJ_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // offsets) - switch (ins) - { - case INS_sve_st1h: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - case INS_sve_st1w: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - case INS_sve_st1d: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } - break; - + case IF_SVE_JD_4A: // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + case IF_SVE_JD_4B: // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + case IF_SVE_JJ_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled // offsets) - switch (ins) - { - case INS_sve_st1h: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - case INS_sve_st1w: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - case INS_sve_st1d: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } - break; - case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled // offsets) - switch (ins) - { - case INS_sve_st1h: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - case INS_sve_st1w: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } - break; - case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled // offsets) - switch (ins) - { - case INS_sve_st1h: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - case INS_sve_st1w: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } - break; - case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled // offsets) - switch (ins) - { - case INS_sve_st1b: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } - break; - case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit // unscaled offsets) - switch (ins) - { - case INS_sve_st1b: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } - break; - - case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) - switch (ins) - { - case INS_sve_st1b: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - case INS_sve_st1h: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } - break; - - case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) - switch (ins) - { - case INS_sve_st1w: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } + case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; break; default: From bb6dbf52bcdcc935516fa73c707251cac6072090 Mon Sep 17 00:00:00 2001 From: TIHan Date: Mon, 22 Jan 2024 10:41:32 -0800 Subject: [PATCH 15/17] Feedback --- src/coreclr/jit/emitarm64.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index a6410bcea8c0c..f4555afced0e1 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1502,7 +1502,6 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_JD_4A: // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) elemsize = id->idOpSize(); - assert(insOptsScalableStandard(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // ttttt assert(isPredicateRegister(id->idReg2())); // ggg assert(isGeneralRegister(id->idReg3())); // nnnnn @@ -10605,7 +10604,6 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_st1b: case INS_sve_st1h: - assert(insOptsScalableStandard(opt)); assert(isVectorRegister(reg1)); assert(isPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); From bb526304002292f861928529e15ebcbdb877cdfc Mon Sep 17 00:00:00 2001 From: TIHan Date: Mon, 22 Jan 2024 12:32:01 -0800 Subject: [PATCH 16/17] fix build --- src/coreclr/jit/emitarm64.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index f4555afced0e1..a2c2a9175fceb 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1534,6 +1534,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) // offsets) case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit // unscaled offsets) + elemsize = id->idOpSize(); assert(insOptsScalable32bitExtends(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // ttttt assert(isPredicateRegister(id->idReg2())); // ggg @@ -11180,11 +11181,10 @@ void emitter::emitIns_R_R_R_R(instruction ins, assert(isPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); assert(isScalableVectorSize(size)); - // st1h is reserved for scalable B - assert((ins == INS_sve_st1h) ? insOptsScalableAtLeastHalf(opt) - : insOptsScalableStandard(opt)); if (insOptsScalableStandard(opt)) { + // st1h is reserved for scalable B + assert((ins == INS_sve_st1h) ? insOptsScalableAtLeastHalf(opt) : true); assert(isGeneralRegister(reg4)); assert(sopt == INS_SCALABLE_OPTS_LSL_N); fmt = IF_SVE_JD_4A; From 35bbf18036561615cc52a02f68abef6f4e7b480d Mon Sep 17 00:00:00 2001 From: TIHan Date: Mon, 22 Jan 2024 14:17:24 -0800 Subject: [PATCH 17/17] Formatting --- src/coreclr/jit/codegenarm64test.cpp | 12 ++-- src/coreclr/jit/emitarm64.cpp | 99 ++++++++++++++-------------- 2 files changed, 57 insertions(+), 54 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 68bc7727cca0d..f7c9567a35872 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5933,10 +5933,14 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_SCALABLE_OPTS_MOD_N); // ST1W {.D }, , [, .D, #2] // IF_SVE_JJ_4A_C - theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V1, REG_P5, REG_R1, REG_V3, INS_OPTS_SCALABLE_D_UXTW); // ST1H {.D }, , [, .D, ] - theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V1, REG_P5, REG_R1, REG_V3, INS_OPTS_SCALABLE_D_SXTW); // ST1H {.D }, , [, .D, ] - theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, REG_V4, INS_OPTS_SCALABLE_D_UXTW); // ST1W {.D }, , [, .D, ] - theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, REG_V4, INS_OPTS_SCALABLE_D_SXTW); // ST1W {.D }, , [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V1, REG_P5, REG_R1, REG_V3, + INS_OPTS_SCALABLE_D_UXTW); // ST1H {.D }, , [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V1, REG_P5, REG_R1, REG_V3, + INS_OPTS_SCALABLE_D_SXTW); // ST1H {.D }, , [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, REG_V4, + INS_OPTS_SCALABLE_D_UXTW); // ST1W {.D }, , [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, REG_V4, + INS_OPTS_SCALABLE_D_SXTW); // ST1W {.D }, , [, .D, ] // IF_SVE_JJ_4A_D theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V7, REG_P5, REG_R4, REG_V1, diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index a2c2a9175fceb..42ea4d031eb81 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -20034,14 +20034,14 @@ void emitter::emitDispInsHelp( printf("["); emitDispReg(id->idReg3(), EA_8BYTE, true); // nnnnn emitDispReg(id->idReg4(), EA_8BYTE, true); // mmmmm - printf("lsl #2]"); + printf("lsl #2]"); break; // {.D }, , [, .D, #3] // {.S }, , [, .S, #1] // {.S }, , [, .S, #2] - case IF_SVE_JJ_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // offsets) + case IF_SVE_JJ_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) // {.D }, , [, .D, ] // {.D }, , [, .D, #1] // {.D }, , [, .D, #2] @@ -20059,59 +20059,58 @@ void emitter::emitDispInsHelp( // {.S }, , [, .S, ] case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit // unscaled offsets) - { - emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), - true); // ttttt - emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, id->idInsOpt(), true); // ggg - printf("["); - emitDispReg(id->idReg3(), EA_8BYTE, true); // nnnnn - emitDispSveReg(id->idReg4(), id->idInsOpt(), true); // mmmmm - emitDispSveExtendOpts(id->idInsOpt()); - switch (ins) { - case INS_sve_st1b: - printf("]"); - break; - - case INS_sve_st1h: - if ((fmt == IF_SVE_JJ_4A_C) || (fmt == IF_SVE_JJ_4A_D)) - { + emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt + emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, id->idInsOpt(), true); // ggg + printf("["); + emitDispReg(id->idReg3(), EA_8BYTE, true); // nnnnn + emitDispSveReg(id->idReg4(), id->idInsOpt(), true); // mmmmm + emitDispSveExtendOpts(id->idInsOpt()); + switch (ins) + { + case INS_sve_st1b: printf("]"); - } - else - { - printf(" #1]"); - } - break; + break; - case INS_sve_st1w: - if ((fmt == IF_SVE_JJ_4A_C) || (fmt == IF_SVE_JJ_4A_D)) - { - printf("]"); - } - else - { - printf(" #2]"); - } - break; + case INS_sve_st1h: + if ((fmt == IF_SVE_JJ_4A_C) || (fmt == IF_SVE_JJ_4A_D)) + { + printf("]"); + } + else + { + printf(" #1]"); + } + break; - case INS_sve_st1d: - if (fmt == IF_SVE_JJ_4A_B) - { - printf("]"); - } - else - { - printf(" #3]"); - } - break; + case INS_sve_st1w: + if ((fmt == IF_SVE_JJ_4A_C) || (fmt == IF_SVE_JJ_4A_D)) + { + printf("]"); + } + else + { + printf(" #2]"); + } + break; - default: - assert(!"Invalid instruction"); - break; + case INS_sve_st1d: + if (fmt == IF_SVE_JJ_4A_B) + { + printf("]"); + } + else + { + printf(" #3]"); + } + break; + + default: + assert(!"Invalid instruction"); + break; + } + break; } - break; - } // {.}, , [{, #, MUL VL}] case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate)