-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Extend foldICmpBinOp
to add
-like or
.
#71396
Conversation
✅ With the latest revision this PR passed the C/C++ code formatter. |
f4346fc
to
c73a702
Compare
@@ -4564,6 +4564,17 @@ static Instruction *foldICmpXorXX(ICmpInst &I, const SimplifyQuery &Q, | |||
return nullptr; | |||
} | |||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have seen checks for or
which is actually an add
in other places. Should we make this into a centralized utility function?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great, looks like this comes up in other places too: #72583
I'll wait for that PR to land, so I can apply it here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also see similar PR: #75899
3133fd3
to
d52843a
Compare
@llvm/pr-subscribers-llvm-transforms Author: Mikhail Gudim (mgudim) ChangesInstCombine canonicalizes In this patch we generalize Full diff: https://github.com/llvm/llvm-project/pull/71396.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 289976718e52f3..0b99260bb635a9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4624,31 +4624,37 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
}
bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
- if (BO0 && isa<OverflowingBinaryOperator>(BO0))
- NoOp0WrapProblem =
- ICmpInst::isEquality(Pred) ||
- (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) ||
- (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap());
- if (BO1 && isa<OverflowingBinaryOperator>(BO1))
- NoOp1WrapProblem =
- ICmpInst::isEquality(Pred) ||
- (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) ||
- (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap());
-
+ bool Op0HasNUW = false, Op1HasNUW = false;
+ bool Op0HasNSW = false, Op1HasNSW = false;
// Analyze the case when either Op0 or Op1 is an add instruction.
// Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
- if (BO0 && BO0->getOpcode() == Instruction::Add) {
- A = BO0->getOperand(0);
- B = BO0->getOperand(1);
+ auto hasNoWrapProblem = [](const BinaryOperator &BO, CmpInst::Predicate Pred,
+ bool &HasNSW, bool &HasNUW) -> bool {
+ if (isa<OverflowingBinaryOperator>(BO)) {
+ HasNUW = BO.hasNoUnsignedWrap();
+ HasNSW = BO.hasNoSignedWrap();
+ return ICmpInst::isEquality(Pred) ||
+ (CmpInst::isUnsigned(Pred) && HasNUW) ||
+ (CmpInst::isSigned(Pred) && HasNSW);
+ } else if (BO.getOpcode() == Instruction::Or) {
+ HasNUW = true;
+ HasNSW = true;
+ return true;
+ } else {
+ return false;
+ }
+ };
+
+ if (BO0) {
+ match(BO0, m_AddLike(m_Value(A), m_Value(B)));
+ NoOp0WrapProblem = hasNoWrapProblem(*BO0, Pred, Op0HasNSW, Op0HasNUW);
}
- if (BO1 && BO1->getOpcode() == Instruction::Add) {
- C = BO1->getOperand(0);
- D = BO1->getOperand(1);
+ if (BO1) {
+ match(BO1, m_AddLike(m_Value(C), m_Value(D)));
+ NoOp1WrapProblem = hasNoWrapProblem(*BO1, Pred, Op1HasNSW, Op1HasNUW);
}
- // icmp (A+B), A -> icmp B, 0 for equalities or if there is no overflow.
- // icmp (A+B), B -> icmp A, 0 for equalities or if there is no overflow.
if ((A == Op1 || B == Op1) && NoOp0WrapProblem)
return new ICmpInst(Pred, A == Op1 ? B : A,
Constant::getNullValue(Op1->getType()));
@@ -4764,17 +4770,15 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
APInt AP2Abs = AP2->abs();
if (AP1Abs.uge(AP2Abs)) {
APInt Diff = *AP1 - *AP2;
- bool HasNUW = BO0->hasNoUnsignedWrap() && Diff.ule(*AP1);
- bool HasNSW = BO0->hasNoSignedWrap();
Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff);
- Value *NewAdd = Builder.CreateAdd(A, C3, "", HasNUW, HasNSW);
+ Value *NewAdd = Builder.CreateAdd(
+ A, C3, "", Op0HasNUW && Diff.ule(*AP1), Op0HasNSW);
return new ICmpInst(Pred, NewAdd, C);
} else {
APInt Diff = *AP2 - *AP1;
- bool HasNUW = BO1->hasNoUnsignedWrap() && Diff.ule(*AP2);
- bool HasNSW = BO1->hasNoSignedWrap();
Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff);
- Value *NewAdd = Builder.CreateAdd(C, C3, "", HasNUW, HasNSW);
+ Value *NewAdd = Builder.CreateAdd(
+ C, C3, "", Op1HasNUW && Diff.ule(*AP1), Op1HasNSW);
return new ICmpInst(Pred, A, NewAdd);
}
}
@@ -4868,16 +4872,14 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
// if Z != 0 and nsw(X * Z) and nsw(Y * Z)
// X * Z eq/ne Y * Z -> X eq/ne Y
- if (NonZero && BO0 && BO1 && BO0->hasNoSignedWrap() &&
- BO1->hasNoSignedWrap())
+ if (NonZero && BO0 && BO1 && Op0HasNSW && Op1HasNSW)
return new ICmpInst(Pred, X, Y);
} else
NonZero = isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
// If Z != 0 and nuw(X * Z) and nuw(Y * Z)
// X * Z u{lt/le/gt/ge}/eq/ne Y * Z -> X u{lt/le/gt/ge}/eq/ne Y
- if (NonZero && BO0 && BO1 && BO0->hasNoUnsignedWrap() &&
- BO1->hasNoUnsignedWrap())
+ if (NonZero && BO0 && BO1 && Op0HasNUW && Op1HasNUW)
return new ICmpInst(Pred, X, Y);
}
}
@@ -4976,8 +4978,8 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
case Instruction::Shl: {
- bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap();
- bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap();
+ bool NUW = Op0HasNUW && Op1HasNUW;
+ bool NSW = Op0HasNSW && Op1HasNSW;
if (!NUW && !NSW)
break;
if (!NSW && I.isSigned())
diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll
index 1c7bb36f0d34c0..cc1bcffa136066 100644
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -3862,10 +3862,9 @@ define <8 x i1> @bitreverse_vec_ne(<8 x i16> %x, <8 x i16> %y) {
define i1 @knownbits1(i8 %a, i8 %b) {
; CHECK-LABEL: @knownbits1(
; CHECK-NEXT: [[A1:%.*]] = and i8 [[A:%.*]], 1
-; CHECK-NEXT: [[A2:%.*]] = or disjoint i8 [[A1]], 4
; CHECK-NEXT: [[B1:%.*]] = and i8 [[B:%.*]], 2
-; CHECK-NEXT: [[B2:%.*]] = or disjoint i8 [[B1]], 5
-; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A2]], [[B2]]
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i8 [[B1]], 1
+; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A1]], [[TMP1]]
; CHECK-NEXT: ret i1 [[C]]
;
%a1 = and i8 %a, 5
@@ -3879,10 +3878,9 @@ define i1 @knownbits1(i8 %a, i8 %b) {
define i1 @knownbits2(i8 %a, i8 %b) {
; CHECK-LABEL: @knownbits2(
; CHECK-NEXT: [[A1:%.*]] = and i8 [[A:%.*]], 1
-; CHECK-NEXT: [[A2:%.*]] = or disjoint i8 [[A1]], 4
; CHECK-NEXT: [[B1:%.*]] = and i8 [[B:%.*]], 2
-; CHECK-NEXT: [[B2:%.*]] = or disjoint i8 [[B1]], 5
-; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A2]], [[B2]]
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i8 [[B1]], 1
+; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A1]], [[TMP1]]
; CHECK-NEXT: ret i1 [[C]]
;
%a1 = and i8 %a, 5
@@ -3896,10 +3894,9 @@ define i1 @knownbits2(i8 %a, i8 %b) {
define i1 @knownbits3(i8 %a, i8 %b) {
; CHECK-LABEL: @knownbits3(
; CHECK-NEXT: [[A1:%.*]] = and i8 [[A:%.*]], 1
-; CHECK-NEXT: [[A2:%.*]] = or disjoint i8 [[A1]], 4
; CHECK-NEXT: [[B1:%.*]] = and i8 [[B:%.*]], 2
-; CHECK-NEXT: [[B2:%.*]] = or disjoint i8 [[B1]], 5
-; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[B2]], [[A2]]
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i8 [[B1]], 1
+; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[TMP1]], [[A1]]
; CHECK-NEXT: ret i1 [[C]]
;
%a1 = and i8 %a, 5
@@ -3913,10 +3910,9 @@ define i1 @knownbits3(i8 %a, i8 %b) {
define <2 x i1> @knownbits4(<2 x i8> %a, <2 x i8> %b) {
; CHECK-LABEL: @knownbits4(
; CHECK-NEXT: [[A1:%.*]] = and <2 x i8> [[A:%.*]], <i8 1, i8 1>
-; CHECK-NEXT: [[A2:%.*]] = or disjoint <2 x i8> [[A1]], <i8 4, i8 4>
; CHECK-NEXT: [[B1:%.*]] = and <2 x i8> [[B:%.*]], <i8 2, i8 2>
-; CHECK-NEXT: [[B2:%.*]] = or disjoint <2 x i8> [[B1]], <i8 5, i8 5>
-; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[B2]], [[A2]]
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint <2 x i8> [[B1]], <i8 1, i8 1>
+; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[TMP1]], [[A1]]
; CHECK-NEXT: ret <2 x i1> [[C]]
;
%a1 = and <2 x i8> %a, <i8 5, i8 5>
@@ -3932,10 +3928,9 @@ define <2 x i1> @knownbits4(<2 x i8> %a, <2 x i8> %b) {
define i1 @knownbits5(i8 %a, i8 %b) {
; CHECK-LABEL: @knownbits5(
; CHECK-NEXT: [[A1:%.*]] = and i8 [[A:%.*]], -127
-; CHECK-NEXT: [[A2:%.*]] = or disjoint i8 [[A1]], 4
; CHECK-NEXT: [[B1:%.*]] = and i8 [[B:%.*]], 2
-; CHECK-NEXT: [[B2:%.*]] = or disjoint i8 [[B1]], 5
-; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A2]], [[B2]]
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i8 [[B1]], 1
+; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A1]], [[TMP1]]
; CHECK-NEXT: ret i1 [[C]]
;
%a1 = and i8 %a, 133
@@ -3949,10 +3944,9 @@ define i1 @knownbits5(i8 %a, i8 %b) {
define i1 @knownbits6(i8 %a, i8 %b) {
; CHECK-LABEL: @knownbits6(
; CHECK-NEXT: [[A1:%.*]] = and i8 [[A:%.*]], -127
-; CHECK-NEXT: [[A2:%.*]] = or disjoint i8 [[A1]], 4
; CHECK-NEXT: [[B1:%.*]] = and i8 [[B:%.*]], 2
-; CHECK-NEXT: [[B2:%.*]] = or disjoint i8 [[B1]], 5
-; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A2]], [[B2]]
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i8 [[B1]], 1
+; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A1]], [[TMP1]]
; CHECK-NEXT: ret i1 [[C]]
;
%a1 = and i8 %a, 133
@@ -3966,10 +3960,9 @@ define i1 @knownbits6(i8 %a, i8 %b) {
define <2 x i1> @knownbits7(<2 x i8> %a, <2 x i8> %b) {
; CHECK-LABEL: @knownbits7(
; CHECK-NEXT: [[A1:%.*]] = and <2 x i8> [[A:%.*]], <i8 -127, i8 -127>
-; CHECK-NEXT: [[A2:%.*]] = or disjoint <2 x i8> [[A1]], <i8 4, i8 4>
; CHECK-NEXT: [[B1:%.*]] = and <2 x i8> [[B:%.*]], <i8 2, i8 2>
-; CHECK-NEXT: [[B2:%.*]] = or disjoint <2 x i8> [[B1]], <i8 5, i8 5>
-; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i8> [[B2]], [[A2]]
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint <2 x i8> [[B1]], <i8 1, i8 1>
+; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i8> [[TMP1]], [[A1]]
; CHECK-NEXT: ret <2 x i1> [[C]]
;
%a1 = and <2 x i8> %a, <i8 133, i8 133>
@@ -3983,10 +3976,9 @@ define <2 x i1> @knownbits7(<2 x i8> %a, <2 x i8> %b) {
define i1 @knownbits8(i8 %a, i8 %b) {
; CHECK-LABEL: @knownbits8(
; CHECK-NEXT: [[A1:%.*]] = and i8 [[A:%.*]], -127
-; CHECK-NEXT: [[A2:%.*]] = or disjoint i8 [[A1]], 4
; CHECK-NEXT: [[B1:%.*]] = and i8 [[B:%.*]], 2
-; CHECK-NEXT: [[B2:%.*]] = or disjoint i8 [[B1]], 5
-; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[B2]], [[A2]]
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i8 [[B1]], 1
+; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[TMP1]], [[A1]]
; CHECK-NEXT: ret i1 [[C]]
;
%a1 = and i8 %a, 133
|
7eaae44
to
275f781
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
(CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap()); | ||
|
||
bool Op0HasNUW = false, Op1HasNUW = false; | ||
bool Op0HasNSW = false, Op1HasNSW = false; | ||
// Analyze the case when either Op0 or Op1 is an add instruction. | ||
// Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null). |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This comment should be below the lambda (above the Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
line).
InstCombine canonicalizes `add` to `or` when possible, but this makes some optimizations applicable to `add` to be missed because they don't realize that the `or` is equivalent to `add`. In this patch we generalize `foldICmpBinOp` to handle such cases.
275f781
to
4dee840
Compare
This patch tries to fold minmax intrinsic by using `computeConstantRangeIncludingKnownBits`. Fixes regression in [_karatsuba_rec:cpython/Modules/_decimal/libmpdec/mpdecimal.c](https://github.com/python/cpython/blob/c31943af16f885c8cf5d5a690c25c366afdb2862/Modules/_decimal/libmpdec/mpdecimal.c#L5460-L5462), which was introduced by #71396. See also dtcxzyw/llvm-opt-benchmark#16 (comment). Alive2 for splat vectors with undef: https://alive2.llvm.org/ce/z/J8hKWd
InstCombine canonicalizes
add
toor
when possible, but this makes some optimizations applicable toadd
to be missed because they don't realize that theor
is equivalent toadd
.In this patch we generalize
foldICmpBinOp
to handle such cases.