Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Commit

Permalink
Optimize integer div/mod by const power of 2 in lowering
Browse files Browse the repository at this point in the history
Optimizing GT_DIV/GT_UDIV/GT_MOD/GT_UMOD by power of 2 in codegen is problematic because the xarch DIV instruction has special register requirements. By the time codegen decides to perform the optimization the rax and rdx registers have been already allocated by LSRA even though they're not always needed (as it happens in the case of unsigned division where CDQ isn't used).

Since the JIT can't represent a CDQ instruction in its IR an arithmetic shift (GT_RSH) has been instead to extract the dividend sign. xarch's SAR is larger than CDQ but it has the advantage that it doesn't require specific registers. Also, arithmetic shifts are available on architectures other than xarch.

Example: method "static int foo(int x) => x / 8;" is now compiled to
mov      eax, ecx
mov      edx, eax
sar      edx, 31
and      edx, 7
add      edx, eax
mov      eax, edx
sar      eax, 3

instead of
mov      eax, ecx
cdq
and      edx, 7
add      eax, edx
sar      eax, 3

As a side-effect of this change the optimization now also works when the divisor is too large to be contained. Previously this wasn't possible because the divisor constant needed to be modified during codegen but the constant was already loaded into a register.

Example: method "static ulong foo(ulong x) => x / 4294967296;" is now compiled to
mov      rax, rcx
shr      rax, 32

whereas before a DIV instruction was used.

This change also fixes an issue in fgShouldUseMagicNumberDivide. The optimization that is done in lower can handle negative power of 2 divisors but fgShouldUseMagicNumberDivide handled those cases because it didn't check the absolute value of the divisor.

Example: method "static int foo(int x) => return x / -2;" is now compiled to

mov      eax, ecx
mov      edx, eax
shr      edx, 31
add      edx, eax
sar      edx, 1
mov      eax, edx
neg      eax

instead of
mov      eax, 0x7FFFFFFF
imul     edx:eax, ecx
mov      eax, edx
sub      eax, ecx
mov      edx, eax
shr      edx, 31
add      eax, edx
  • Loading branch information
mikedn committed Jun 22, 2016
1 parent 4286b40 commit babfca8
Show file tree
Hide file tree
Showing 8 changed files with 299 additions and 299 deletions.
112 changes: 0 additions & 112 deletions src/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3365,118 +3365,6 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
}
}


// Generate code for division (or mod) by power of two
// or negative powers of two. (meaning -1 * a power of two, not 2^(-1))
// Op2 must be a contained integer constant.
void
CodeGen::genCodeForPow2Div(GenTreeOp* tree)
{
#if 0
GenTree *dividend = tree->gtOp.gtOp1;
GenTree *divisor = tree->gtOp.gtOp2;
genTreeOps oper = tree->OperGet();
emitAttr size = emitTypeSize(tree);
emitter *emit = getEmitter();
regNumber targetReg = tree->gtRegNum;
var_types targetType = tree->TypeGet();

bool isSigned = oper == GT_MOD || oper == GT_DIV;

// precondition: extended dividend is in RDX:RAX
// which means it is either all zeros or all ones

noway_assert(divisor->isContained());
GenTreeIntConCommon* divImm = divisor->AsIntConCommon();
int64_t imm = divImm->IconValue();
ssize_t abs_imm = abs(imm);
noway_assert(isPow2(abs_imm));


if (isSigned)
{
if (imm == 1)
{
if (targetReg != REG_RAX)
inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);

return;
}

if (abs_imm == 2)
{
if (oper == GT_MOD)
{
emit->emitIns_R_I(INS_and, size, REG_RAX, 1); // result is 0 or 1
// xor with rdx will flip all bits if negative
emit->emitIns_R_R(INS_xor, size, REG_RAX, REG_RDX); // 111.11110 or 0
}
else
{
assert(oper == GT_DIV);
// add 1 if it's negative
emit->emitIns_R_R(INS_sub, size, REG_RAX, REG_RDX);
}
}
else
{
// add imm-1 if negative
emit->emitIns_R_I(INS_and, size, REG_RDX, abs_imm - 1);
emit->emitIns_R_R(INS_add, size, REG_RAX, REG_RDX);
}

if (oper == GT_DIV)
{
unsigned shiftAmount = genLog2(unsigned(abs_imm));
inst_RV_SH(INS_sar, size, REG_RAX, shiftAmount);

if (imm < 0)
{
emit->emitIns_R(INS_neg, size, REG_RAX);
}
}
else
{
assert(oper == GT_MOD);
if (abs_imm > 2)
{
emit->emitIns_R_I(INS_and, size, REG_RAX, abs_imm - 1);
}
// RDX contains 'imm-1' if negative
emit->emitIns_R_R(INS_sub, size, REG_RAX, REG_RDX);
}

if (targetReg != REG_RAX)
{
inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
}
}
else
{
assert (imm > 0);

if (targetReg != dividend->gtRegNum)
{
inst_RV_RV(INS_mov, targetReg, dividend->gtRegNum, targetType);
}

if (oper == GT_UDIV)
{
inst_RV_SH(INS_shr, size, targetReg, genLog2(unsigned(imm)));
}
else
{
assert(oper == GT_UMOD);

emit->emitIns_R_I(INS_and, size, targetReg, imm -1);
}
}
#else // !0
NYI("genCodeForPow2Div");
#endif // !0
}


/***********************************************************************************************
* Generate code for localloc
*/
Expand Down
2 changes: 0 additions & 2 deletions src/jit/codegenlinear.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@

void genCodeForMulHi(GenTreeOp* treeNode);

void genCodeForPow2Div(GenTreeOp* treeNode);

void genLeaInstruction(GenTreeAddrMode *lea);

void genSetRegToCond(regNumber dstReg, GenTreePtr tree);
Expand Down
162 changes: 18 additions & 144 deletions src/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1280,42 +1280,30 @@ void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
gcInfo.gcMarkRegSetNpt(RBM_RDX);
}

if (divisor->isContainedIntOrIImmed())
{
GenTreeIntConCommon* divImm = divisor->AsIntConCommon();
assert(divImm->IsIntCnsFitsInI32());
ssize_t imm = divImm->IconValue();
assert(isPow2(abs(imm)));
genCodeForPow2Div(treeNode->AsOp());
}
// Perform the 'targetType' (64-bit or 32-bit) divide instruction
instruction ins;
if (oper == GT_UMOD || oper == GT_UDIV)
ins = INS_div;
else
{
// Perform the 'targetType' (64-bit or 32-bit) divide instruction
instruction ins;
if (oper == GT_UMOD || oper == GT_UDIV)
ins = INS_div;
else
ins = INS_idiv;
ins = INS_idiv;

emit->emitInsBinary(ins, size, treeNode, divisor);
emit->emitInsBinary(ins, size, treeNode, divisor);

// Signed divide RDX:RAX by r/m64, with result
// stored in RAX := Quotient, RDX := Remainder.
// Move the result to the desired register, if necessary
if (oper == GT_DIV || oper == GT_UDIV)
// DIV/IDIV instructions always store the quotient in RAX and the remainder in RDX.
// Move the result to the desired register, if necessary
if (oper == GT_DIV || oper == GT_UDIV)
{
if (targetReg != REG_RAX)
{
if (targetReg != REG_RAX)
{
inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
}
inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
}
else
}
else
{
assert((oper == GT_MOD) || (oper == GT_UMOD));
if (targetReg != REG_RDX)
{
assert((oper == GT_MOD) || (oper == GT_UMOD));
if (targetReg != REG_RDX)
{
inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
}
inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
}
}
}
Expand Down Expand Up @@ -2888,120 +2876,6 @@ CodeGen::genMultiRegCallStoreToLocal(GenTreePtr treeNode)
#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
}

// Generate code for division (or mod) by power of two
// or negative powers of two. (meaning -1 * a power of two, not 2^(-1))
// Op2 must be a contained integer constant.
void
CodeGen::genCodeForPow2Div(GenTreeOp* tree)
{
GenTree *dividend = tree->gtOp.gtOp1;
GenTree *divisor = tree->gtOp.gtOp2;
genTreeOps oper = tree->OperGet();
emitAttr size = emitTypeSize(tree);
emitter *emit = getEmitter();
regNumber targetReg = tree->gtRegNum;
var_types targetType = tree->TypeGet();

bool isSigned = oper == GT_MOD || oper == GT_DIV;

// precondition: extended dividend is in RDX:RAX
// which means it is either all zeros or all ones

noway_assert(divisor->isContained());
GenTreeIntConCommon* divImm = divisor->AsIntConCommon();
ssize_t imm = divImm->IconValue();
ssize_t abs_imm = abs(imm);
noway_assert(isPow2(abs_imm));


if (isSigned)
{
if (imm == 1)
{
if (oper == GT_DIV)
{
if (targetReg != REG_RAX)
inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
}
else
{
assert(oper == GT_MOD);
instGen_Set_Reg_To_Zero(size, targetReg);
}

return;
}

if (abs_imm == 2)
{
if (oper == GT_MOD)
{
emit->emitIns_R_I(INS_and, size, REG_RAX, 1); // result is 0 or 1
// xor with rdx will flip all bits if negative
emit->emitIns_R_R(INS_xor, size, REG_RAX, REG_RDX); // 111.11110 or 0
}
else
{
assert(oper == GT_DIV);
// add 1 if it's negative
emit->emitIns_R_R(INS_sub, size, REG_RAX, REG_RDX);
}
}
else
{
// add imm-1 if negative
emit->emitIns_R_I(INS_and, size, REG_RDX, abs_imm - 1);
emit->emitIns_R_R(INS_add, size, REG_RAX, REG_RDX);
}

if (oper == GT_DIV)
{
unsigned shiftAmount = genLog2(unsigned(abs_imm));
inst_RV_SH(INS_sar, size, REG_RAX, shiftAmount);

if (imm < 0)
{
emit->emitIns_R(INS_neg, size, REG_RAX);
}
}
else
{
assert(oper == GT_MOD);
if (abs_imm > 2)
{
emit->emitIns_R_I(INS_and, size, REG_RAX, abs_imm - 1);
}
// RDX contains 'imm-1' if negative
emit->emitIns_R_R(INS_sub, size, REG_RAX, REG_RDX);
}

if (targetReg != REG_RAX)
{
inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
}
}
else
{
assert (imm > 0);

if (targetReg != dividend->gtRegNum)
{
inst_RV_RV(INS_mov, targetReg, dividend->gtRegNum, targetType);
}

if (oper == GT_UDIV)
{
inst_RV_SH(INS_shr, size, targetReg, genLog2(unsigned(imm)));
}
else
{
assert(oper == GT_UMOD);

emit->emitIns_R_I(INS_and, size, targetReg, imm -1);
}
}
}


/***********************************************************************************************
* Generate code for localloc
Expand Down
Loading

0 comments on commit babfca8

Please sign in to comment.