From bd32598c9f44d3b12ec9434cc13042baeba542bf Mon Sep 17 00:00:00 2001 From: Will Smith Date: Fri, 21 Apr 2023 09:49:50 -0700 Subject: [PATCH] [JIT] ARM64 - Combine 'neg' and 'cmp' to 'cmn' (#84667) * Combine compare and shift ops into a single compare op * Fix comment * Expanding IsContainableBinaryOp. * Remove commented code * Added more cases * Initial work * Emitting cmn * Handle shift-version of neg and cmn * Add optimization check * Check for set flags * Added extra asserts * Renamed IsContainableBinaryOp to IsContainableUnaryOrBinaryOp. Fixed an assert. * Remove optimizing code from TryLowerConditionToFlagsNode. Added additional check in containing NEG for a CMP or compare op. --- src/coreclr/jit/codegenarm64.cpp | 92 +++++++++++++++++++++++++++----- src/coreclr/jit/lower.cpp | 6 ++- src/coreclr/jit/lower.h | 2 +- src/coreclr/jit/lowerarmarch.cpp | 66 +++++++++++++++++++---- 4 files changed, 141 insertions(+), 25 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 15e47ad6c346f6..030ca9c17d36c0 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -3369,13 +3369,40 @@ void CodeGen::genCodeForNegNot(GenTree* tree) // The src must be a register. if (tree->OperIs(GT_NEG) && operand->isContained()) { - ins = INS_mneg; - GenTree* op1 = tree->gtGetOp1(); - GenTree* a = op1->gtGetOp1(); - GenTree* b = op1->gtGetOp2(); - genConsumeRegs(op1); - assert(op1->OperGet() == GT_MUL); - GetEmitter()->emitIns_R_R_R(ins, emitActualTypeSize(tree), targetReg, a->GetRegNum(), b->GetRegNum()); + genTreeOps oper = operand->OperGet(); + switch (oper) + { + case GT_MUL: + { + ins = INS_mneg; + GenTree* op1 = tree->gtGetOp1(); + GenTree* a = op1->gtGetOp1(); + GenTree* b = op1->gtGetOp2(); + genConsumeRegs(op1); + GetEmitter()->emitIns_R_R_R(ins, emitActualTypeSize(tree), targetReg, a->GetRegNum(), b->GetRegNum()); + } + break; + + case GT_LSH: + case GT_RSH: + case GT_RSZ: + { + assert(ins == INS_neg || ins == INS_negs); + assert(operand->gtGetOp2()->IsCnsIntOrI()); + assert(operand->gtGetOp2()->isContained()); + + GenTree* op1 = tree->gtGetOp1(); + GenTree* a = op1->gtGetOp1(); + GenTree* b = op1->gtGetOp2(); + genConsumeRegs(op1); + GetEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(tree), targetReg, a->GetRegNum(), + b->AsIntConCommon()->IntegralValue(), ShiftOpToInsOpts(oper)); + } + break; + + default: + unreached(); + } } else { @@ -4534,12 +4561,53 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree) } else if (op2->isContained()) { - assert(op2->OperIs(GT_LSH, GT_RSH, GT_RSZ)); - assert(op2->gtGetOp2()->IsCnsIntOrI()); - assert(op2->gtGetOp2()->isContained()); + genTreeOps oper = op2->OperGet(); + switch (oper) + { + case GT_NEG: + assert(ins == INS_cmp); + + ins = INS_cmn; + oper = op2->gtGetOp1()->OperGet(); + switch (oper) + { + case GT_LSH: + case GT_RSH: + case GT_RSZ: + { + GenTree* shiftOp1 = op2->gtGetOp1()->gtGetOp1(); + GenTree* shiftOp2 = op2->gtGetOp1()->gtGetOp2(); + + assert(op2->gtGetOp1()->isContained()); + assert(shiftOp2->IsCnsIntOrI()); + assert(shiftOp2->isContained()); + + emit->emitIns_R_R_I(ins, cmpSize, op1->GetRegNum(), shiftOp1->GetRegNum(), + shiftOp2->AsIntConCommon()->IntegralValue(), ShiftOpToInsOpts(oper)); + } + break; + + default: + assert(!op2->gtGetOp1()->isContained()); + + emit->emitIns_R_R(ins, cmpSize, op1->GetRegNum(), op2->gtGetOp1()->GetRegNum()); + break; + } + break; + + case GT_LSH: + case GT_RSH: + case GT_RSZ: + assert(op2->gtGetOp2()->IsCnsIntOrI()); + assert(op2->gtGetOp2()->isContained()); + + emit->emitIns_R_R_I(ins, cmpSize, op1->GetRegNum(), op2->gtGetOp1()->GetRegNum(), + op2->gtGetOp2()->AsIntConCommon()->IntegralValue(), ShiftOpToInsOpts(oper)); + break; - emit->emitIns_R_R_I(ins, cmpSize, op1->GetRegNum(), op2->gtGetOp1()->GetRegNum(), - op2->gtGetOp2()->AsIntConCommon()->IntegralValue(), ShiftOpToInsOpts(op2->gtOper)); + default: + unreached(); + } } else { diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 2d0a2fcef1d821..466787e02184b3 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -3775,7 +3775,7 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue) jtrue->AsCC()->gtCondition = condCode; } - JITDUMP("Result:\n"); + JITDUMP("Lowering JTRUE Result:\n"); DISPTREERANGE(BlockRange(), jtrue); JITDUMP("\n"); @@ -3876,6 +3876,10 @@ GenTree* Lowering::LowerSelect(GenTreeConditional* select) // bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, GenCondition* cond) { + JITDUMP("Lowering condition:\n"); + DISPTREERANGE(BlockRange(), condition); + JITDUMP("\n"); + if (condition->OperIsCompare()) { if (!IsInvariantInRange(condition, parent)) diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 16677ef3304e92..d36376bf769489 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -475,7 +475,7 @@ class Lowering final : public Phase } #ifdef TARGET_ARM64 - bool IsContainableBinaryOp(GenTree* parentNode, GenTree* childNode) const; + bool IsContainableUnaryOrBinaryOp(GenTree* parentNode, GenTree* childNode) const; #endif // TARGET_ARM64 #if defined(FEATURE_HW_INTRINSICS) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 7b710aab8eecad..afbddcf04b06d5 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -148,7 +148,7 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const #ifdef TARGET_ARM64 //------------------------------------------------------------------------ -// IsContainableBinaryOp: Is the child node a binary op that is containable from the parent node? +// IsContainableUnaryOrBinaryOp: Is the child node a unary/binary op that is containable from the parent node? // // Return Value: // True if the child node can be contained. @@ -156,10 +156,20 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const // Notes: // This can handle the decision to emit 'madd' or 'msub'. // -bool Lowering::IsContainableBinaryOp(GenTree* parentNode, GenTree* childNode) const +bool Lowering::IsContainableUnaryOrBinaryOp(GenTree* parentNode, GenTree* childNode) const { +#ifdef DEBUG // The node we're checking should be one of the two child nodes - assert((parentNode->gtGetOp1() == childNode) || (parentNode->gtGetOp2() == childNode)); + if (parentNode->OperIsBinary()) + { + assert((parentNode->gtGetOp1() == childNode) || (parentNode->gtGetOp2() == childNode)); + } + else + { + assert(parentNode->OperIsUnary()); + assert((parentNode->gtGetOp1() == childNode)); + } +#endif // DEBUG // We cannot contain if the parent node // * is contained @@ -173,7 +183,7 @@ bool Lowering::IsContainableBinaryOp(GenTree* parentNode, GenTree* childNode) co if (!varTypeIsIntegral(parentNode)) return false; - if (parentNode->gtGetOp1()->isContained() || parentNode->gtGetOp2()->isContained()) + if (parentNode->gtGetOp1()->isContained() || (parentNode->OperIsBinary() && parentNode->gtGetOp2()->isContained())) return false; if (parentNode->OperMayOverflow() && parentNode->gtOverflow()) @@ -252,7 +262,7 @@ bool Lowering::IsContainableBinaryOp(GenTree* parentNode, GenTree* childNode) co return false; } - if (parentNode->OperIs(GT_ADD, GT_SUB, GT_AND)) + if (parentNode->OperIs(GT_ADD, GT_SUB, GT_AND, GT_NEG)) { // These operations can still report flags @@ -282,6 +292,32 @@ bool Lowering::IsContainableBinaryOp(GenTree* parentNode, GenTree* childNode) co return false; } + if (childNode->OperIs(GT_NEG)) + { + // If we have a contained LSH, RSH or RSZ, we can still contain NEG if the parent is a CMP or comparison op. + if (childNode->gtGetOp1()->isContained() && !childNode->gtGetOp1()->OperIs(GT_LSH, GT_RSH, GT_RSZ)) + { + // Cannot contain if the childs op1 is already contained + return false; + } + + if ((parentNode->gtFlags & GTF_SET_FLAGS) != 0) + { + // Cannot contain if the parent operation needs to set flags + return false; + } + + if (parentNode->OperIs(GT_CMP) || parentNode->OperIsCompare()) + { + if (IsInvariantInRange(childNode, parentNode)) + { + return true; + } + } + + return false; + } + if (childNode->OperIs(GT_CAST)) { // Find "a op cast(b)" @@ -2027,7 +2063,7 @@ void Lowering::ContainCheckBinary(GenTreeOp* node) #ifdef TARGET_ARM64 if (comp->opts.OptimizationEnabled()) { - if (IsContainableBinaryOp(node, op2)) + if (IsContainableUnaryOrBinaryOp(node, op2)) { if (op2->OperIs(GT_CAST)) { @@ -2039,7 +2075,7 @@ void Lowering::ContainCheckBinary(GenTreeOp* node) return; } - if (node->OperIsCommutative() && IsContainableBinaryOp(node, op1)) + if (node->OperIsCommutative() && IsContainableUnaryOrBinaryOp(node, op1)) { if (op1->OperIs(GT_CAST)) { @@ -2259,19 +2295,22 @@ void Lowering::ContainCheckCompare(GenTreeOp* cmp) } #ifdef TARGET_ARM64 - if (comp->opts.OptimizationEnabled() && cmp->OperIsCompare()) + if (comp->opts.OptimizationEnabled() && (cmp->OperIsCompare() || cmp->OperIs(GT_CMP))) { - if (IsContainableBinaryOp(cmp, op2)) + if (IsContainableUnaryOrBinaryOp(cmp, op2)) { MakeSrcContained(cmp, op2); return; } - if (IsContainableBinaryOp(cmp, op1)) + if (IsContainableUnaryOrBinaryOp(cmp, op1)) { MakeSrcContained(cmp, op1); std::swap(cmp->gtOp1, cmp->gtOp2); - cmp->SetOper(cmp->SwapRelop(cmp->gtOper)); + if (cmp->OperIsCompare()) + { + cmp->SetOper(cmp->SwapRelop(cmp->gtOper)); + } return; } } @@ -2501,6 +2540,11 @@ void Lowering::ContainCheckNeg(GenTreeOp* neg) MakeSrcContained(neg, childNode); } } + else if (comp->opts.OptimizationEnabled() && childNode->OperIs(GT_LSH, GT_RSH, GT_RSZ) && + IsContainableUnaryOrBinaryOp(neg, childNode)) + { + MakeSrcContained(neg, childNode); + } } //----------------------------------------------------------------------------------------------