Optimize integer div/mod by const power of 2 in lowering

Optimizing GT_DIV/GT_UDIV/GT_MOD/GT_UMOD by power of 2 in codegen is problematic because the xarch DIV instruction has special register requirements. By the time codegen decides to perform the optimization the rax and rdx registers have been already allocated by LSRA even though they're not always needed (as it happens in the case of unsigned division where CDQ isn't used). Since the JIT can't represent a CDQ instruction in its IR an arithmetic shift (GT_RSH) has been instead to extract the dividend sign. xarch's SAR is larger than CDQ but it has the advantage that it doesn't require specific registers. Also, arithmetic shifts are available on architectures other than xarch. Example: method "static int foo(int x) => x / 8;" is now compiled to mov eax, ecx mov edx, eax sar edx, 31 and edx, 7 add edx, eax mov eax, edx sar eax, 3 instead of mov eax, ecx cdq and edx, 7 add eax, edx sar eax, 3 As a side-effect of this change the optimization now also works when the divisor is too large to be contained. Previously this wasn't possible because the divisor constant needed to be modified during codegen but the constant was already loaded into a register. Example: method "static ulong foo(ulong x) => x / 4294967296;" is now compiled to mov rax, rcx shr rax, 32 whereas before a DIV instruction was used. This change also fixes an issue in fgShouldUseMagicNumberDivide. The optimization that is done in lower can handle negative power of 2 divisors but fgShouldUseMagicNumberDivide handled those cases because it didn't check the absolute value of the divisor. Example: method "static int foo(int x) => return x / -2;" is now compiled to mov eax, ecx mov edx, eax shr edx, 31 add edx, eax sar edx, 1 mov eax, edx neg eax instead of mov eax, 0x7FFFFFFF imul edx:eax, ecx mov eax, edx sub eax, ecx mov edx, eax shr edx, 31 add eax, edx
dotnet · Jun 22, 2016 · babfca8 · babfca8
1 parent 4286b40
commit babfca8
Show file tree

Hide file tree

Showing 8 changed files with 299 additions and 299 deletions.
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp
@@ -3365,118 +3365,6 @@ CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
     }
 }
 
-
-// Generate code for division (or mod) by power of two
-// or negative powers of two.  (meaning -1 * a power of two, not 2^(-1))
-// Op2 must be a contained integer constant.
-void
-CodeGen::genCodeForPow2Div(GenTreeOp* tree)
-{
-#if 0
-    GenTree *dividend = tree->gtOp.gtOp1;
-    GenTree *divisor  = tree->gtOp.gtOp2;
-    genTreeOps  oper  = tree->OperGet();
-    emitAttr    size  = emitTypeSize(tree);
-    emitter    *emit  = getEmitter();
-    regNumber targetReg  = tree->gtRegNum;
-    var_types targetType = tree->TypeGet();
-
-    bool isSigned = oper == GT_MOD || oper == GT_DIV;
-
-    // precondition: extended dividend is in RDX:RAX
-    // which means it is either all zeros or all ones
-
-    noway_assert(divisor->isContained());
-    GenTreeIntConCommon* divImm = divisor->AsIntConCommon();
-    int64_t imm = divImm->IconValue();
-    ssize_t abs_imm = abs(imm);
-    noway_assert(isPow2(abs_imm));
-
-
-    if (isSigned)
-    {
-        if (imm == 1)
-        {
-            if (targetReg != REG_RAX)
-                inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
-
-            return;
-        }
-
-        if (abs_imm == 2)
-        {
-            if (oper == GT_MOD)
-            {
-                emit->emitIns_R_I(INS_and, size, REG_RAX, 1); // result is 0 or 1
-                // xor with rdx will flip all bits if negative
-                emit->emitIns_R_R(INS_xor, size, REG_RAX, REG_RDX); // 111.11110 or 0
-            }
-            else
-            {
-                assert(oper == GT_DIV);
-                // add 1 if it's negative
-                emit->emitIns_R_R(INS_sub, size, REG_RAX, REG_RDX);
-            }
-        }
-        else
-        {
-            // add imm-1 if negative
-            emit->emitIns_R_I(INS_and, size, REG_RDX, abs_imm - 1);
-            emit->emitIns_R_R(INS_add, size, REG_RAX, REG_RDX);
-        }
-
-        if (oper == GT_DIV)
-        {
-            unsigned shiftAmount = genLog2(unsigned(abs_imm));
-            inst_RV_SH(INS_sar, size, REG_RAX, shiftAmount);
-
-            if (imm < 0)
-            {
-                emit->emitIns_R(INS_neg, size, REG_RAX);
-            }
-        }
-        else
-        {
-            assert(oper == GT_MOD);
-            if (abs_imm > 2)
-            {
-                emit->emitIns_R_I(INS_and, size, REG_RAX, abs_imm - 1);
-            }
-            // RDX contains 'imm-1' if negative
-            emit->emitIns_R_R(INS_sub, size, REG_RAX, REG_RDX);
-        }
-
-        if (targetReg != REG_RAX)
-        {
-            inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
-        }
-    }
-    else
-    {
-        assert (imm > 0);
-
-        if (targetReg != dividend->gtRegNum)
-        {
-            inst_RV_RV(INS_mov, targetReg, dividend->gtRegNum, targetType);
-        }
-
-        if (oper == GT_UDIV)
-        {
-            inst_RV_SH(INS_shr, size, targetReg, genLog2(unsigned(imm)));
-        }
-        else 
-        {
-            assert(oper == GT_UMOD);
-
-            emit->emitIns_R_I(INS_and, size, targetReg, imm -1);
-        }
-    }
-#else // !0
-    NYI("genCodeForPow2Div");
-#endif // !0
-}
-
-
 /***********************************************************************************************
  *  Generate code for localloc
  */

diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h
@@ -20,8 +20,6 @@
 
     void                genCodeForMulHi(GenTreeOp* treeNode);
 
-    void                genCodeForPow2Div(GenTreeOp* treeNode);
-
     void                genLeaInstruction(GenTreeAddrMode *lea);
 
     void                genSetRegToCond(regNumber dstReg, GenTreePtr tree);

diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
@@ -1280,42 +1280,30 @@ void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
             gcInfo.gcMarkRegSetNpt(RBM_RDX);
         }
 
-        if (divisor->isContainedIntOrIImmed())
-        {
-            GenTreeIntConCommon* divImm = divisor->AsIntConCommon();
-            assert(divImm->IsIntCnsFitsInI32());
-            ssize_t imm = divImm->IconValue();
-            assert(isPow2(abs(imm)));
-            genCodeForPow2Div(treeNode->AsOp());
-        }
+        // Perform the 'targetType' (64-bit or 32-bit) divide instruction
+        instruction ins;
+        if (oper == GT_UMOD || oper == GT_UDIV)
+            ins = INS_div;
         else
-        {
-            // Perform the 'targetType' (64-bit or 32-bit) divide instruction
-            instruction ins;
-            if (oper == GT_UMOD || oper == GT_UDIV)
-                ins = INS_div;
-            else
-                ins = INS_idiv;
+            ins = INS_idiv;
 
-            emit->emitInsBinary(ins, size, treeNode, divisor);
+        emit->emitInsBinary(ins, size, treeNode, divisor);
 
-            // Signed divide RDX:RAX by r/m64, with result
-            //    stored in RAX := Quotient, RDX := Remainder.
-            // Move the result to the desired register, if necessary
-            if (oper == GT_DIV || oper == GT_UDIV)
+        // DIV/IDIV instructions always store the quotient in RAX and the remainder in RDX.
+        // Move the result to the desired register, if necessary
+        if (oper == GT_DIV || oper == GT_UDIV)
+        {
+            if (targetReg != REG_RAX)
             {
-                if (targetReg != REG_RAX)
-                {
-                    inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
-                }
+                inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
             }
-            else
+        }
+        else
+        {
+            assert((oper == GT_MOD) || (oper == GT_UMOD));
+            if (targetReg != REG_RDX)
             {
-                assert((oper == GT_MOD) || (oper == GT_UMOD));
-                if (targetReg != REG_RDX)
-                {
-                    inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
-                }
+                inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
             }
         }
     }
@@ -2888,120 +2876,6 @@ CodeGen::genMultiRegCallStoreToLocal(GenTreePtr treeNode)
 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
 }
 
-// Generate code for division (or mod) by power of two
-// or negative powers of two.  (meaning -1 * a power of two, not 2^(-1))
-// Op2 must be a contained integer constant.
-void
-CodeGen::genCodeForPow2Div(GenTreeOp* tree)
-{
-    GenTree *dividend = tree->gtOp.gtOp1;
-    GenTree *divisor  = tree->gtOp.gtOp2;
-    genTreeOps  oper  = tree->OperGet();
-    emitAttr    size  = emitTypeSize(tree);
-    emitter    *emit  = getEmitter();
-    regNumber targetReg  = tree->gtRegNum;
-    var_types targetType = tree->TypeGet();
-
-    bool isSigned = oper == GT_MOD || oper == GT_DIV;
-
-    // precondition: extended dividend is in RDX:RAX
-    // which means it is either all zeros or all ones
-
-    noway_assert(divisor->isContained());
-    GenTreeIntConCommon* divImm = divisor->AsIntConCommon();
-    ssize_t imm = divImm->IconValue();
-    ssize_t abs_imm = abs(imm);
-    noway_assert(isPow2(abs_imm));
-
-
-    if (isSigned)
-    {
-        if (imm == 1)
-        {
-            if (oper == GT_DIV)
-            {
-                if (targetReg != REG_RAX)
-                    inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
-            }
-            else
-            {
-                assert(oper == GT_MOD);
-                instGen_Set_Reg_To_Zero(size, targetReg);
-            }
-
-            return;
-        }
-
-        if (abs_imm == 2)
-        {
-            if (oper == GT_MOD)
-            {
-                emit->emitIns_R_I(INS_and, size, REG_RAX, 1); // result is 0 or 1
-                // xor with rdx will flip all bits if negative
-                emit->emitIns_R_R(INS_xor, size, REG_RAX, REG_RDX); // 111.11110 or 0
-            }
-            else
-            {
-                assert(oper == GT_DIV);
-                // add 1 if it's negative
-                emit->emitIns_R_R(INS_sub, size, REG_RAX, REG_RDX);
-            }
-        }
-        else
-        {
-            // add imm-1 if negative
-            emit->emitIns_R_I(INS_and, size, REG_RDX, abs_imm - 1);
-            emit->emitIns_R_R(INS_add, size, REG_RAX, REG_RDX);
-        }
-
-        if (oper == GT_DIV)
-        {
-            unsigned shiftAmount = genLog2(unsigned(abs_imm));
-            inst_RV_SH(INS_sar, size, REG_RAX, shiftAmount);
-
-            if (imm < 0)
-            {
-                emit->emitIns_R(INS_neg, size, REG_RAX);
-            }
-        }
-        else
-        {
-            assert(oper == GT_MOD);
-            if (abs_imm > 2)
-            {
-                emit->emitIns_R_I(INS_and, size, REG_RAX, abs_imm - 1);
-            }
-            // RDX contains 'imm-1' if negative
-            emit->emitIns_R_R(INS_sub, size, REG_RAX, REG_RDX);
-        }
-
-        if (targetReg != REG_RAX)
-        {
-            inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
-        }
-    }
-    else
-    {
-        assert (imm > 0);
-
-        if (targetReg != dividend->gtRegNum)
-        {
-            inst_RV_RV(INS_mov, targetReg, dividend->gtRegNum, targetType);
-        }
-
-        if (oper == GT_UDIV)
-        {
-            inst_RV_SH(INS_shr, size, targetReg, genLog2(unsigned(imm)));
-        }
-        else 
-        {
-            assert(oper == GT_UMOD);
-
-            emit->emitIns_R_I(INS_and, size, targetReg, imm -1);
-        }
-    }
-}
-
 
 /***********************************************************************************************
  *  Generate code for localloc