Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RyuJIT] Add "rorx" instruction (BMI2) and emit it instead of "rol" when possible #41772

Merged
merged 15 commits into from
Sep 29, 2020
Merged
30 changes: 21 additions & 9 deletions src/coreclr/src/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4067,10 +4067,11 @@ void CodeGen::genCodeForShift(GenTree* tree)

if (shiftBy->isContainedIntOrIImmed())
{
emitAttr size = emitTypeSize(tree);

// Optimize "X<<1" to "lea [reg+reg]" or "add reg, reg"
if (tree->OperIs(GT_LSH) && !tree->gtOverflowEx() && !tree->gtSetFlags() && shiftBy->IsIntegralConst(1))
{
emitAttr size = emitTypeSize(tree);
if (tree->GetRegNum() == operandReg)
{
GetEmitter()->emitIns_R_R(INS_add, size, tree->GetRegNum(), operandReg);
Expand All @@ -4082,16 +4083,27 @@ void CodeGen::genCodeForShift(GenTree* tree)
}
else
{
// First, move the operand to the destination register and
// later on perform the shift in-place.
// (LSRA will try to avoid this situation through preferencing.)
if (tree->GetRegNum() != operandReg)
int typeWidth = genTypeSize(genActualType(targetType)) * 8;
int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue();
if (compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2) && tree->OperIs(GT_ROL) && (shiftByValue > 0) &&
(shiftByValue < typeWidth))
{
inst_RV_RV(INS_mov, tree->GetRegNum(), operandReg, targetType);
}
assert((typeWidth == 32) || (typeWidth == 64));

int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue();
inst_RV_SH(ins, emitTypeSize(tree), tree->GetRegNum(), shiftByValue);
int reversedValue = typeWidth - shiftByValue;
GetEmitter()->emitIns_R_R_I(INS_rorx, size, tree->GetRegNum(), operandReg, reversedValue);
}
else
{
// First, move the operand to the destination register and
// later on perform the shift in-place.
// (LSRA will try to avoid this situation through preferencing.)
if (tree->GetRegNum() != operandReg)
{
inst_RV_RV(INS_mov, tree->GetRegNum(), operandReg, targetType);
}
inst_RV_SH(ins, size, tree->GetRegNum(), shiftByValue);
}
}
}
else
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/src/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,7 @@ bool TakesRexWPrefix(instruction ins, emitAttr attr)
case INS_mulx:
case INS_pdep:
case INS_pext:
case INS_rorx:
return true;
default:
return false;
Expand Down Expand Up @@ -758,6 +759,7 @@ unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, c
{
switch (ins)
{
case INS_rorx:
case INS_pdep:
case INS_mulx:
{
Expand Down Expand Up @@ -1242,6 +1244,7 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
case INS_pextrq:
case INS_pextrw:
case INS_pextrw_sse41:
case INS_rorx:
{
// These SSE instructions write to a general purpose integer register.
return false;
Expand Down Expand Up @@ -14938,6 +14941,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_tzcnt:
case INS_popcnt:
case INS_crc32:
case INS_rorx:
case INS_pdep:
case INS_pext:
case INS_addsubps:
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/src/jit/instrsxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,7 @@ INST3(blsr, "blsr", IUM_WR, BAD_CODE, BAD_CODE,
INST3(bextr, "bextr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF7), INS_Flags_IsDstDstSrcAVXInstruction) // Bit Field Extract

// BMI2
INST3(rorx, "rorx", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xF0), INS_FLAGS_None)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be good to double-check that neither the IsDstDstSrcAVXInstruction nor IsDstSrcSrcAVXInstruction flags apply. The instruction looks to be a dst, src, imm operand in this case and so it shouldn't be flagged as RMW and should go down the right encoding paths in emit.

Copy link
Member Author

@EgorBo EgorBo Sep 3, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tried both of these flags and observed SEH exceptions on start 😞 with INS_FLAGS_None works like a charm.

INST3(pdep, "pdep", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Deposit
INST3(pext, "pext", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Extract
INST3(bzhi, "bzhi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Zero High Bits Starting with Specified Bit Position
Expand Down