From 562a10ca39bda14018e1ebee7a782b46d7b7bcb6 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 3 Sep 2020 01:26:27 +0300 Subject: [PATCH 01/12] Use rorx instead of rol when possible --- src/coreclr/src/jit/codegenxarch.cpp | 30 +++++++++++++++++++--------- src/coreclr/src/jit/emitxarch.cpp | 4 ++++ src/coreclr/src/jit/instrsxarch.h | 1 + 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/src/coreclr/src/jit/codegenxarch.cpp b/src/coreclr/src/jit/codegenxarch.cpp index 7e63dd6a4d64c5..34866d7d87781c 100644 --- a/src/coreclr/src/jit/codegenxarch.cpp +++ b/src/coreclr/src/jit/codegenxarch.cpp @@ -4067,10 +4067,11 @@ void CodeGen::genCodeForShift(GenTree* tree) if (shiftBy->isContainedIntOrIImmed()) { + emitAttr size = emitTypeSize(tree); + // Optimize "X<<1" to "lea [reg+reg]" or "add reg, reg" if (tree->OperIs(GT_LSH) && !tree->gtOverflowEx() && !tree->gtSetFlags() && shiftBy->IsIntegralConst(1)) { - emitAttr size = emitTypeSize(tree); if (tree->GetRegNum() == operandReg) { GetEmitter()->emitIns_R_R(INS_add, size, tree->GetRegNum(), operandReg); @@ -4082,16 +4083,27 @@ void CodeGen::genCodeForShift(GenTree* tree) } else { - // First, move the operand to the destination register and - // later on perform the shift in-place. - // (LSRA will try to avoid this situation through preferencing.) - if (tree->GetRegNum() != operandReg) + int typeWidth = genTypeSize(genActualType(targetType)) * 8; + int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue(); + if (tree->OperIs(GT_ROL) && (shiftByValue > 0) && (shiftByValue < typeWidth)) { - inst_RV_RV(INS_mov, tree->GetRegNum(), operandReg, targetType); - } + assert((typeWidth == 32) || (typeWidth == 64)); - int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue(); - inst_RV_SH(ins, emitTypeSize(tree), tree->GetRegNum(), shiftByValue); + // TODO: check if BMI2 is available + int reversedValue = typeWidth - shiftByValue; + GetEmitter()->emitIns_R_R_I(INS_rorx, size, tree->GetRegNum(), operandReg, reversedValue); + } + else + { + // First, move the operand to the destination register and + // later on perform the shift in-place. + // (LSRA will try to avoid this situation through preferencing.) + if (tree->GetRegNum() != operandReg) + { + inst_RV_RV(INS_mov, tree->GetRegNum(), operandReg, targetType); + } + inst_RV_SH(ins, size, tree->GetRegNum(), shiftByValue); + } } } else diff --git a/src/coreclr/src/jit/emitxarch.cpp b/src/coreclr/src/jit/emitxarch.cpp index f21b1170eb83cc..08197ccd55f3d6 100644 --- a/src/coreclr/src/jit/emitxarch.cpp +++ b/src/coreclr/src/jit/emitxarch.cpp @@ -524,6 +524,7 @@ bool TakesRexWPrefix(instruction ins, emitAttr attr) case INS_mulx: case INS_pdep: case INS_pext: + case INS_rorx: return true; default: return false; @@ -758,6 +759,7 @@ unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, c { switch (ins) { + case INS_rorx: case INS_pdep: case INS_mulx: { @@ -1242,6 +1244,7 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id) case INS_pextrq: case INS_pextrw: case INS_pextrw_sse41: + case INS_rorx: { // These SSE instructions write to a general purpose integer register. return false; @@ -14938,6 +14941,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_tzcnt: case INS_popcnt: case INS_crc32: + case INS_rorx: case INS_pdep: case INS_pext: case INS_addsubps: diff --git a/src/coreclr/src/jit/instrsxarch.h b/src/coreclr/src/jit/instrsxarch.h index 60616c5177b58f..c6a50690f70290 100644 --- a/src/coreclr/src/jit/instrsxarch.h +++ b/src/coreclr/src/jit/instrsxarch.h @@ -594,6 +594,7 @@ INST3(blsr, "blsr", IUM_WR, BAD_CODE, BAD_CODE, INST3(bextr, "bextr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF7), INS_Flags_IsDstDstSrcAVXInstruction) // Bit Field Extract // BMI2 +INST3(rorx, "rorx", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xF0), INS_FLAGS_None) INST3(pdep, "pdep", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Deposit INST3(pext, "pext", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Extract INST3(bzhi, "bzhi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Zero High Bits Starting with Specified Bit Position From 928c55dfcea076827bd96ca10bda6040e1783e4a Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 3 Sep 2020 11:42:56 +0300 Subject: [PATCH 02/12] check for BMI2 availability --- src/coreclr/src/jit/codegenxarch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/src/jit/codegenxarch.cpp b/src/coreclr/src/jit/codegenxarch.cpp index 34866d7d87781c..d71095147de206 100644 --- a/src/coreclr/src/jit/codegenxarch.cpp +++ b/src/coreclr/src/jit/codegenxarch.cpp @@ -4085,11 +4085,11 @@ void CodeGen::genCodeForShift(GenTree* tree) { int typeWidth = genTypeSize(genActualType(targetType)) * 8; int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue(); - if (tree->OperIs(GT_ROL) && (shiftByValue > 0) && (shiftByValue < typeWidth)) + if (compiler->compExactlyDependsOn(InstructionSet_BMI2) && tree->OperIs(GT_ROL) && + (shiftByValue > 0) && (shiftByValue < typeWidth)) { assert((typeWidth == 32) || (typeWidth == 64)); - // TODO: check if BMI2 is available int reversedValue = typeWidth - shiftByValue; GetEmitter()->emitIns_R_R_I(INS_rorx, size, tree->GetRegNum(), operandReg, reversedValue); } From ab495fc1f4728aa6cfaedcd939b7f40ce3df7cac Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Thu, 3 Sep 2020 16:16:59 +0300 Subject: [PATCH 03/12] Update codegenxarch.cpp --- src/coreclr/src/jit/codegenxarch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/src/jit/codegenxarch.cpp b/src/coreclr/src/jit/codegenxarch.cpp index d71095147de206..14a68e7bdf0b5f 100644 --- a/src/coreclr/src/jit/codegenxarch.cpp +++ b/src/coreclr/src/jit/codegenxarch.cpp @@ -4085,8 +4085,8 @@ void CodeGen::genCodeForShift(GenTree* tree) { int typeWidth = genTypeSize(genActualType(targetType)) * 8; int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue(); - if (compiler->compExactlyDependsOn(InstructionSet_BMI2) && tree->OperIs(GT_ROL) && - (shiftByValue > 0) && (shiftByValue < typeWidth)) + if (compiler->compExactlyDependsOn(InstructionSet_BMI2) && tree->OperIs(GT_ROL) && (shiftByValue > 0) && + (shiftByValue < typeWidth)) { assert((typeWidth == 32) || (typeWidth == 64)); From ddb338220257dae240d7c9fa1166842695b52288 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Thu, 3 Sep 2020 19:05:45 +0300 Subject: [PATCH 04/12] Update codegenxarch.cpp --- src/coreclr/src/jit/codegenxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/codegenxarch.cpp b/src/coreclr/src/jit/codegenxarch.cpp index 14a68e7bdf0b5f..cece24eb3ce822 100644 --- a/src/coreclr/src/jit/codegenxarch.cpp +++ b/src/coreclr/src/jit/codegenxarch.cpp @@ -4085,7 +4085,7 @@ void CodeGen::genCodeForShift(GenTree* tree) { int typeWidth = genTypeSize(genActualType(targetType)) * 8; int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue(); - if (compiler->compExactlyDependsOn(InstructionSet_BMI2) && tree->OperIs(GT_ROL) && (shiftByValue > 0) && + if (compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2) && tree->OperIs(GT_ROL) && (shiftByValue > 0) && (shiftByValue < typeWidth)) { assert((typeWidth == 32) || (typeWidth == 64)); From b85c87bcb3e6bac0faeb6016683ed26d334c2f78 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 3 Sep 2020 20:21:23 +0300 Subject: [PATCH 05/12] formatting --- src/coreclr/src/jit/codegenxarch.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/coreclr/src/jit/codegenxarch.cpp b/src/coreclr/src/jit/codegenxarch.cpp index cece24eb3ce822..c5acc04673c217 100644 --- a/src/coreclr/src/jit/codegenxarch.cpp +++ b/src/coreclr/src/jit/codegenxarch.cpp @@ -4085,8 +4085,10 @@ void CodeGen::genCodeForShift(GenTree* tree) { int typeWidth = genTypeSize(genActualType(targetType)) * 8; int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue(); - if (compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2) && tree->OperIs(GT_ROL) && (shiftByValue > 0) && - (shiftByValue < typeWidth)) + + // Try to emit rorx if BMI is available instead of rol + if (compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2) && tree->OperIs(GT_ROL) && + (shiftByValue > 0) && (shiftByValue < typeWidth)) { assert((typeWidth == 32) || (typeWidth == 64)); From 739007b21fe7e8b99113088f25b1d52fadc847d5 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 4 Sep 2020 22:00:40 +0300 Subject: [PATCH 06/12] handle GT_ROR --- src/coreclr/src/jit/codegenxarch.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/src/jit/codegenxarch.cpp b/src/coreclr/src/jit/codegenxarch.cpp index c5acc04673c217..363b5a3b1ebe6c 100644 --- a/src/coreclr/src/jit/codegenxarch.cpp +++ b/src/coreclr/src/jit/codegenxarch.cpp @@ -4087,13 +4087,13 @@ void CodeGen::genCodeForShift(GenTree* tree) int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue(); // Try to emit rorx if BMI is available instead of rol - if (compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2) && tree->OperIs(GT_ROL) && + if (compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2) && tree->OperIs(GT_ROL, GT_ROR) && (shiftByValue > 0) && (shiftByValue < typeWidth)) { assert((typeWidth == 32) || (typeWidth == 64)); - int reversedValue = typeWidth - shiftByValue; - GetEmitter()->emitIns_R_R_I(INS_rorx, size, tree->GetRegNum(), operandReg, reversedValue); + int value = tree->OperIs(GT_ROL) ? typeWidth - shiftByValue : shiftByValue; + GetEmitter()->emitIns_R_R_I(INS_rorx, size, tree->GetRegNum(), operandReg, value); } else { From c5933e93bae1eeedf711939313b624785c8d2ce6 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 4 Sep 2020 22:04:28 +0300 Subject: [PATCH 07/12] clean up --- src/coreclr/src/jit/codegenxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/codegenxarch.cpp b/src/coreclr/src/jit/codegenxarch.cpp index 363b5a3b1ebe6c..1454ed7fcbdb84 100644 --- a/src/coreclr/src/jit/codegenxarch.cpp +++ b/src/coreclr/src/jit/codegenxarch.cpp @@ -4092,7 +4092,7 @@ void CodeGen::genCodeForShift(GenTree* tree) { assert((typeWidth == 32) || (typeWidth == 64)); - int value = tree->OperIs(GT_ROL) ? typeWidth - shiftByValue : shiftByValue; + int value = tree->OperIs(GT_ROL) ? (typeWidth - shiftByValue) : shiftByValue; GetEmitter()->emitIns_R_R_I(INS_rorx, size, tree->GetRegNum(), operandReg, value); } else From 609af9e0c4d2d73198ca9a3bc4c2086579c79b84 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sat, 5 Sep 2020 21:10:44 +0300 Subject: [PATCH 08/12] Don't emit rorx when targetReg == operandReg --- src/coreclr/src/jit/codegenxarch.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/src/jit/codegenxarch.cpp b/src/coreclr/src/jit/codegenxarch.cpp index 1454ed7fcbdb84..78dd2f4cb7e895 100644 --- a/src/coreclr/src/jit/codegenxarch.cpp +++ b/src/coreclr/src/jit/codegenxarch.cpp @@ -4086,9 +4086,9 @@ void CodeGen::genCodeForShift(GenTree* tree) int typeWidth = genTypeSize(genActualType(targetType)) * 8; int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue(); - // Try to emit rorx if BMI is available instead of rol - if (compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2) && tree->OperIs(GT_ROL, GT_ROR) && - (shiftByValue > 0) && (shiftByValue < typeWidth)) + // Try to emit rorx if BMI2 is available instead of mov+rol + if (compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2) && (tree->GetRegNum() != operandReg) && + tree->OperIs(GT_ROL, GT_ROR) && (shiftByValue > 0) && (shiftByValue < typeWidth)) { assert((typeWidth == 32) || (typeWidth == 64)); From 5a36432fdc46ea9470c5a2ccd109dec59243accb Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 9 Sep 2020 18:27:41 +0300 Subject: [PATCH 09/12] emit rorx only for long integers --- src/coreclr/src/jit/codegenxarch.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/coreclr/src/jit/codegenxarch.cpp b/src/coreclr/src/jit/codegenxarch.cpp index 78dd2f4cb7e895..dcbd0aee207f87 100644 --- a/src/coreclr/src/jit/codegenxarch.cpp +++ b/src/coreclr/src/jit/codegenxarch.cpp @@ -4083,16 +4083,15 @@ void CodeGen::genCodeForShift(GenTree* tree) } else { - int typeWidth = genTypeSize(genActualType(targetType)) * 8; int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue(); // Try to emit rorx if BMI2 is available instead of mov+rol - if (compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2) && (tree->GetRegNum() != operandReg) && - tree->OperIs(GT_ROL, GT_ROR) && (shiftByValue > 0) && (shiftByValue < typeWidth)) + // it makes sense only for 64bit integers + if ((genActualType(targetType) == TYP_LONG) && (tree->GetRegNum() != operandReg) && + compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2) && tree->OperIs(GT_ROL, GT_ROR) && + (shiftByValue > 0) && (shiftByValue < 64)) { - assert((typeWidth == 32) || (typeWidth == 64)); - - int value = tree->OperIs(GT_ROL) ? (typeWidth - shiftByValue) : shiftByValue; + const int value = tree->OperIs(GT_ROL) ? (64 - shiftByValue) : shiftByValue; GetEmitter()->emitIns_R_R_I(INS_rorx, size, tree->GetRegNum(), operandReg, value); } else From da9d166e70c75850ac8583935c3820387283353c Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 9 Sep 2020 18:33:02 +0300 Subject: [PATCH 10/12] wrap with TARGET_64BIT --- src/coreclr/src/jit/codegenxarch.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/coreclr/src/jit/codegenxarch.cpp b/src/coreclr/src/jit/codegenxarch.cpp index dcbd0aee207f87..a540eba7eeb1f3 100644 --- a/src/coreclr/src/jit/codegenxarch.cpp +++ b/src/coreclr/src/jit/codegenxarch.cpp @@ -4083,7 +4083,8 @@ void CodeGen::genCodeForShift(GenTree* tree) } else { - int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue(); +#if defined(TARGET_64BIT) + const int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue(); // Try to emit rorx if BMI2 is available instead of mov+rol // it makes sense only for 64bit integers @@ -4093,18 +4094,18 @@ void CodeGen::genCodeForShift(GenTree* tree) { const int value = tree->OperIs(GT_ROL) ? (64 - shiftByValue) : shiftByValue; GetEmitter()->emitIns_R_R_I(INS_rorx, size, tree->GetRegNum(), operandReg, value); + genProduceReg(tree); + return; } - else +#endif + // First, move the operand to the destination register and + // later on perform the shift in-place. + // (LSRA will try to avoid this situation through preferencing.) + if (tree->GetRegNum() != operandReg) { - // First, move the operand to the destination register and - // later on perform the shift in-place. - // (LSRA will try to avoid this situation through preferencing.) - if (tree->GetRegNum() != operandReg) - { - inst_RV_RV(INS_mov, tree->GetRegNum(), operandReg, targetType); - } - inst_RV_SH(ins, size, tree->GetRegNum(), shiftByValue); + inst_RV_RV(INS_mov, tree->GetRegNum(), operandReg, targetType); } + inst_RV_SH(ins, size, tree->GetRegNum(), shiftByValue); } } else From 7cbb1ce9824e56b36d66db1416f3f595a9cacd6f Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 9 Sep 2020 18:34:42 +0300 Subject: [PATCH 11/12] clean up --- src/coreclr/src/jit/codegenxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/codegenxarch.cpp b/src/coreclr/src/jit/codegenxarch.cpp index a540eba7eeb1f3..add6b9a9eadbcf 100644 --- a/src/coreclr/src/jit/codegenxarch.cpp +++ b/src/coreclr/src/jit/codegenxarch.cpp @@ -4083,8 +4083,8 @@ void CodeGen::genCodeForShift(GenTree* tree) } else { + int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue(); #if defined(TARGET_64BIT) - const int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue(); // Try to emit rorx if BMI2 is available instead of mov+rol // it makes sense only for 64bit integers From 9e9b1cb2c6483c340a182eb23497ce5363ea6bce Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 9 Sep 2020 18:35:39 +0300 Subject: [PATCH 12/12] clean up --- src/coreclr/src/jit/codegenxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/codegenxarch.cpp b/src/coreclr/src/jit/codegenxarch.cpp index add6b9a9eadbcf..66759129707363 100644 --- a/src/coreclr/src/jit/codegenxarch.cpp +++ b/src/coreclr/src/jit/codegenxarch.cpp @@ -4084,8 +4084,8 @@ void CodeGen::genCodeForShift(GenTree* tree) else { int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue(); -#if defined(TARGET_64BIT) +#if defined(TARGET_64BIT) // Try to emit rorx if BMI2 is available instead of mov+rol // it makes sense only for 64bit integers if ((genActualType(targetType) == TYP_LONG) && (tree->GetRegNum() != operandReg) &&