-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Fold comparison of adding two z/sext booleans #67895
Conversation
@llvm/pr-subscribers-llvm-transforms Changes
Patch is 24.86 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/67895.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 9f034aba874a8c4..cb828d0a6fc2a37 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2900,14 +2900,114 @@ Instruction *InstCombinerImpl::foldICmpAddConstant(ICmpInst &Cmp,
BinaryOperator *Add,
const APInt &C) {
Value *Y = Add->getOperand(1);
+ Value *X = Add->getOperand(0);
+
+ Value *Op0 = X, *Op1 = Y;
+ const CmpInst::Predicate Pred = Cmp.getPredicate();
+
+ // We handle all (s/zext i1 Op0 + s/zext i1 Op1 ==/!= 0/1/2) here.
+ // TODO: sext -1, sext -2
+ // sext i1 X + sext i1 Y == -1 --> xor i1 X, Y
+ // https://alive2.llvm.org/ce/z/2nSJ22
+ // sext i1 X + sext i1 Y == -2 --> and i1 X, Y
+ // https://alive2.llvm.org/ce/z/rasQlX
+ if (Cmp.isEquality() &&
+ match(Add, m_c_Add(m_OneUse(m_ZExtOrSExt(m_Value(Op0))),
+ m_OneUse(m_ZExtOrSExt(m_Value(Op1))))) &&
+ Op0->getType()->isIntOrIntVectorTy(1) &&
+ Op1->getType()->isIntOrIntVectorTy(1) &&
+ (C.isZero() || C.isOne() || (C.exactLogBase2() == 1))) {
+ Value *Cond = Builder.getFalse();
+ // Handle zext/zext additions
+ if (match(X, m_OneUse(m_ZExt(m_Value(Op0)))) &&
+ match(Y, m_OneUse(m_ZExt(m_Value(Op1))))) {
+ if (Pred == ICmpInst::ICMP_EQ) {
+ // Case 1: zext i1 Op0 + zext i1 Op1 == 0 --> !(or i1 Op0, Op1)
+ if (C.isZero())
+ Cond = Builder.CreateNot(Builder.CreateOr(Op0, Op1));
+ else if (C.isOne())
+ // Case 2: zext i1 Op0 + zext i1 Op1 == 1 --> xor i1 Op0, Op1
+ Cond = Builder.CreateXor(Op0, Op1);
+ else
+ // Case 3: zext i1 Op0 + zext i1 Op1 == 2 --> xor i1 Op0, Op1
+ Cond = Builder.CreateAnd(Op0, Op1);
+ } else {
+ // Case 1: zext i1 Op0 + zext i1 Op1 != 0 --> or i1 Op0, Op1
+ if (C.isZero())
+ Cond = Builder.CreateOr(Op0, Op1);
+ else if (C.isOne())
+ // Case 2: zext i1 Op0 + zext i1 Op1 != 1 --> !(xor i1 Op0, Op1)
+ Cond = Builder.CreateNot(Builder.CreateXor(Op0, Op1));
+ else
+ // Case 3: zext i1 Op0 + zext i1 Op1 != 2 --> !(and i1 Op0, Op1)
+ Cond = Builder.CreateNot(Builder.CreateAnd(Op0, Op1));
+ }
+ return replaceInstUsesWith(Cmp, Cond);
+ }
+
+ // Handles sext i1 Op0 + sext i1 Op1 ==/!= 0/1/2
+ if (match(X, m_OneUse(m_SExt(m_Value(Op0)))) &&
+ match(Y, m_OneUse(m_SExt(m_Value(Op1))))) {
+ if (Pred == ICmpInst::ICMP_EQ) {
+ // Case 1: sext i1 Op0 + sext i1 Op1 == 0 --> !(or i1 Op0, Op1)
+ if (C.isZero())
+ Cond = Builder.CreateNot(Builder.CreateOr(Op0, Op1));
+ // Case 2: sext i1 Op0 + sext i1 Op1 == 1 --> false
+ // Case 3: sext i1 Op0 + sext i1 Op2 == 2 --> false,
+ else
+ Cond = Builder.getFalse();
+ } else {
+ // Case 1: sext i1 Op0 + sext i1 Op1 != 0 --> or i1 Op0, Op1
+ if (C.isZero())
+ Cond = Builder.CreateOr(Op0, Op1);
+ else
+ // Case 2: sext i1 Op0 + sext i1 Op1 != 1 --> true
+ // Case 3: sext i1 Op0 + sext i1 Op2 != 2 --> true
+ Cond = Builder.getTrue();
+ }
+ return replaceInstUsesWith(Cmp, Cond);
+ }
+
+ // Sum is cummulative so swap the operations to avoid recompuations
+ if (match(X, m_OneUse(m_ZExt(m_OneUse(m_Value(Op0))))) &&
+ match(Y, m_OneUse(m_SExt(m_OneUse(m_Value(Op1)))))) {
+ std::swap(Op0, Op1);
+ std::swap(X, Y);
+ }
+ // Handles sext i1 Op0 + zext Op1 == 0/1/2
+ if (match(X, m_OneUse(m_SExt(m_Value(Op0)))) &&
+ match(Y, m_OneUse(m_ZExt(m_Value(Op1))))) {
+ if (Pred == ICmpInst::ICMP_EQ) {
+ // Case 1: sext i1 Op0 + zext i1 Op1 == 0 --> !(xor i1 Op0, Op1)
+ if (C.isZero())
+ Cond = Builder.CreateNot(Builder.CreateXor(Op0, Op1));
+ else if (C.isOne())
+ // Case 2: sext i1 Op0 + zext i1 Op1 == 1 --> (!Op0) & Op1
+ Cond = Builder.CreateAnd(Builder.CreateNot(Op0), Op1);
+ else
+ // Case 3: sext i1 Op0 + zext i1 Op1 == 2 --> false
+ Cond = Builder.getFalse();
+ } else {
+ // Case 1: sext i1 Op0 + zext i1 Op1 != 0 --> xor i1 Op0, Op1
+ if (C.isZero())
+ Cond = Builder.CreateXor(Op0, Op1);
+ else if (C.isOne())
+ // Case 2: sext i1 Op0 + zext i1 Op1 != 1 --> Op0 | (!Op1)
+ Cond = Builder.CreateOr(Op0, Builder.CreateNot(Op1));
+ else
+ // Case 3: sext i1 Op0 + zext i1 Op1 != 2 --> true
+ Cond = Builder.getTrue();
+ }
+ return replaceInstUsesWith(Cmp, Cond);
+ }
+ return nullptr;
+ }
const APInt *C2;
if (Cmp.isEquality() || !match(Y, m_APInt(C2)))
return nullptr;
// Fold icmp pred (add X, C2), C.
- Value *X = Add->getOperand(0);
Type *Ty = Add->getType();
- const CmpInst::Predicate Pred = Cmp.getPredicate();
// If the add does not wrap, we can always adjust the compare by subtracting
// the constants. Equality comparisons are handled elsewhere. SGE/SLE/UGE/ULE
diff --git a/llvm/test/Transforms/InstCombine/icmp-add.ll b/llvm/test/Transforms/InstCombine/icmp-add.ll
index a2d0c3eb39d69ec..fae6fb0daec9ed9 100644
--- a/llvm/test/Transforms/InstCombine/icmp-add.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-add.ll
@@ -5,6 +5,756 @@ declare void @use(i32)
; PR1949
+define i1 @cvt_icmp_0_zext_plus_zext_eq(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @cvt_icmp_0_zext_plus_zext_eq(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[TMP0:%.*]] = or i1 [[ARG1:%.*]], [[ARG:%.*]]
+; CHECK-NEXT: [[I4:%.*]] = xor i1 [[TMP0]], true
+; CHECK-NEXT: ret i1 [[I4]]
+;
+bb:
+ %i = zext i1 %arg to i32
+ %i2 = zext i1 %arg1 to i32
+ %i3 = add i32 %i2, %i
+ %i4 = icmp eq i32 %i3, 0
+ ret i1 %i4
+}
+
+define i1 @cvt_icmp_1_zext_plus_zext_eq(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @cvt_icmp_1_zext_plus_zext_eq(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[I4:%.*]] = xor i1 [[ARG1:%.*]], [[ARG:%.*]]
+; CHECK-NEXT: ret i1 [[I4]]
+;
+bb:
+ %i = zext i1 %arg to i32
+ %i2 = zext i1 %arg1 to i32
+ %i3 = add i32 %i2, %i
+ %i4 = icmp eq i32 %i3, 1
+ ret i1 %i4
+}
+
+define i1 @cvt_icmp_2_zext_plus_zext_eq(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @cvt_icmp_2_zext_plus_zext_eq(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[T:%.*]] = and i1 [[ARG:%.*]], [[ARG1:%.*]]
+; CHECK-NEXT: ret i1 [[T]]
+;
+bb:
+ %i = zext i1 %arg to i32
+ %i2 = zext i1 %arg1 to i32
+ %i3 = add i32 %i, %i2
+ %t = icmp eq i32 %i3, 2
+ ret i1 %t
+}
+
+define i1 @cvt_icmp_0_sext_plus_sext_eq(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @cvt_icmp_0_sext_plus_sext_eq(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[TMP0:%.*]] = or i1 [[ARG:%.*]], [[ARG1:%.*]]
+; CHECK-NEXT: [[T:%.*]] = xor i1 [[TMP0]], true
+; CHECK-NEXT: ret i1 [[T]]
+;
+bb:
+ %i = sext i1 %arg to i32
+ %i2 = sext i1 %arg1 to i32
+ %i3 = add i32 %i, %i2
+ %t = icmp eq i32 %i3, 0
+ ret i1 %t
+}
+
+define i1 @cvt_icmp_1_sext_plus_sext_eq(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @cvt_icmp_1_sext_plus_sext_eq(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: ret i1 false
+;
+bb:
+ %i = sext i1 %arg to i32
+ %i2 = sext i1 %arg1 to i32
+ %i3 = add i32 %i, %i2
+ %t = icmp eq i32 %i3, 1
+ ret i1 %t
+}
+
+define i1 @cvt_icmp_2_sext_plus_sext_eq(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @cvt_icmp_2_sext_plus_sext_eq(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: ret i1 false
+;
+bb:
+ %i = sext i1 %arg to i32
+ %i2 = sext i1 %arg1 to i32
+ %i3 = add i32 %i, %i2
+ %t = icmp eq i32 %i3, 2
+ ret i1 %t
+}
+
+define i1 @cvt_icmp_0_sext_plus_zext_eq(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @cvt_icmp_0_sext_plus_zext_eq(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[ARG:%.*]], [[ARG1:%.*]]
+; CHECK-NEXT: [[T:%.*]] = xor i1 [[TMP0]], true
+; CHECK-NEXT: ret i1 [[T]]
+;
+bb:
+ %i = sext i1 %arg to i32
+ %i2 = zext i1 %arg1 to i32
+ %i3 = add i32 %i, %i2
+ %t = icmp eq i32 %i3, 0
+ ret i1 %t
+}
+
+define i1 @cvt_icmp_1_sext_plus_zext_eq(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @cvt_icmp_1_sext_plus_zext_eq(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[ARG:%.*]], true
+; CHECK-NEXT: [[T:%.*]] = and i1 [[TMP0]], [[ARG1:%.*]]
+; CHECK-NEXT: ret i1 [[T]]
+;
+bb:
+ %i = sext i1 %arg to i32
+ %i2 = zext i1 %arg1 to i32
+ %i3 = add i32 %i, %i2
+ %t = icmp eq i32 %i3, 1
+ ret i1 %t
+}
+
+define i1 @cvt_icmp_2_sext_plus_zext_eq(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @cvt_icmp_2_sext_plus_zext_eq(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: ret i1 false
+;
+bb:
+ %i = sext i1 %arg to i32
+ %i2 = zext i1 %arg1 to i32
+ %i3 = add i32 %i, %i2
+ %t = icmp eq i32 %i3, 2
+ ret i1 %t
+}
+
+define i1 @cvt_icmp_0_zext_plus_zext_ne(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @cvt_icmp_0_zext_plus_zext_ne(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[I4:%.*]] = or i1 [[ARG1:%.*]], [[ARG:%.*]]
+; CHECK-NEXT: ret i1 [[I4]]
+;
+bb:
+ %i = zext i1 %arg to i32
+ %i2 = zext i1 %arg1 to i32
+ %i3 = add i32 %i2, %i
+ %i4 = icmp ne i32 %i3, 0
+ ret i1 %i4
+}
+
+define i1 @cvt_icmp_1_zext_plus_zext_ne(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @cvt_icmp_1_zext_plus_zext_ne(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[ARG1:%.*]], [[ARG:%.*]]
+; CHECK-NEXT: [[I4:%.*]] = xor i1 [[TMP0]], true
+; CHECK-NEXT: ret i1 [[I4]]
+;
+bb:
+ %i = zext i1 %arg to i32
+ %i2 = zext i1 %arg1 to i32
+ %i3 = add i32 %i2, %i
+ %i4 = icmp ne i32 %i3, 1
+ ret i1 %i4
+}
+
+define i1 @cvt_icmp_2_zext_plus_zext_ne(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @cvt_icmp_2_zext_plus_zext_ne(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[ARG:%.*]], [[ARG1:%.*]]
+; CHECK-NEXT: [[T:%.*]] = xor i1 [[TMP0]], true
+; CHECK-NEXT: ret i1 [[T]]
+;
+bb:
+ %i = zext i1 %arg to i32
+ %i2 = zext i1 %arg1 to i32
+ %i3 = add i32 %i, %i2
+ %t = icmp ne i32 %i3, 2
+ ret i1 %t
+}
+
+define i1 @cvt_icmp_0_sext_plus_sext_ne(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @cvt_icmp_0_sext_plus_sext_ne(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[T:%.*]] = or i1 [[ARG:%.*]], [[ARG1:%.*]]
+; CHECK-NEXT: ret i1 [[T]]
+;
+bb:
+ %i = sext i1 %arg to i32
+ %i2 = sext i1 %arg1 to i32
+ %i3 = add i32 %i, %i2
+ %t = icmp ne i32 %i3, 0
+ ret i1 %t
+}
+
+define i1 @cvt_icmp_1_sext_plus_sext_ne(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @cvt_icmp_1_sext_plus_sext_ne(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: ret i1 true
+;
+bb:
+ %i = sext i1 %arg to i32
+ %i2 = sext i1 %arg1 to i32
+ %i3 = add i32 %i, %i2
+ %t = icmp ne i32 %i3, 1
+ ret i1 %t
+}
+
+define i1 @cvt_icmp_2_sext_plus_sext_ne(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @cvt_icmp_2_sext_plus_sext_ne(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: ret i1 true
+;
+bb:
+ %i = sext i1 %arg to i32
+ %i2 = sext i1 %arg1 to i32
+ %i3 = add i32 %i, %i2
+ %t = icmp ne i32 %i3, 2
+ ret i1 %t
+}
+
+define i1 @cvt_icmp_0_sext_plus_zext_ne(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @cvt_icmp_0_sext_plus_zext_ne(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[T:%.*]] = xor i1 [[ARG:%.*]], [[ARG1:%.*]]
+; CHECK-NEXT: ret i1 [[T]]
+;
+bb:
+ %i = sext i1 %arg to i32
+ %i2 = zext i1 %arg1 to i32
+ %i3 = add i32 %i, %i2
+ %t = icmp ne i32 %i3, 0
+ ret i1 %t
+}
+
+define i1 @cvt_icmp_1_sext_plus_zext_ne(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @cvt_icmp_1_sext_plus_zext_ne(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[ARG1:%.*]], true
+; CHECK-NEXT: [[T:%.*]] = or i1 [[TMP0]], [[ARG:%.*]]
+; CHECK-NEXT: ret i1 [[T]]
+;
+bb:
+ %i = sext i1 %arg to i32
+ %i2 = zext i1 %arg1 to i32
+ %i3 = add i32 %i, %i2
+ %t = icmp ne i32 %i3, 1
+ ret i1 %t
+}
+
+define i1 @cvt_icmp_2_sext_plus_zext_ne(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @cvt_icmp_2_sext_plus_zext_ne(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: ret i1 true
+;
+bb:
+ %i = sext i1 %arg to i32
+ %i2 = zext i1 %arg1 to i32
+ %i3 = add i32 %i, %i2
+ %t = icmp ne i32 %i3, 2
+ ret i1 %t
+}
+
+; test if zext i1 X + sext i1 Y converted to sext i1 X + zext i1 Y
+; and then processed
+
+define i1 @cvt_icmp_0_zext_plus_sext_eq(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @cvt_icmp_0_zext_plus_sext_eq(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[ARG1:%.*]], [[ARG:%.*]]
+; CHECK-NEXT: [[T:%.*]] = xor i1 [[TMP0]], true
+; CHECK-NEXT: ret i1 [[T]]
+;
+bb:
+ %i = zext i1 %arg to i32
+ %i2 = sext i1 %arg1 to i32
+ %i3 = add i32 %i, %i2
+ %t = icmp eq i32 %i3, 0
+ ret i1 %t
+}
+
+define i1 @cvt_icmp_1_zext_plus_sext_eq(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @cvt_icmp_1_zext_plus_sext_eq(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[ARG1:%.*]], true
+; CHECK-NEXT: [[T:%.*]] = and i1 [[TMP0]], [[ARG:%.*]]
+; CHECK-NEXT: ret i1 [[T]]
+;
+bb:
+ %i = zext i1 %arg to i32
+ %i2 = sext i1 %arg1 to i32
+ %i3 = add i32 %i, %i2
+ %t = icmp eq i32 %i3, 1
+ ret i1 %t
+}
+
+define i1 @cvt_icmp_2_zext_plus_sext_eq(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @cvt_icmp_2_zext_plus_sext_eq(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: ret i1 false
+;
+bb:
+ %i = zext i1 %arg to i32
+ %i2 = sext i1 %arg1 to i32
+ %i3 = add i32 %i, %i2
+ %t = icmp eq i32 %i3, 2
+ ret i1 %t
+}
+
+define i1 @cvt_icmp_0_zext_plus_sext_ne(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @cvt_icmp_0_zext_plus_sext_ne(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[T:%.*]] = xor i1 [[ARG1:%.*]], [[ARG:%.*]]
+; CHECK-NEXT: ret i1 [[T]]
+;
+bb:
+ %i = zext i1 %arg to i32
+ %i2 = sext i1 %arg1 to i32
+ %i3 = add i32 %i, %i2
+ %t = icmp ne i32 %i3, 0
+ ret i1 %t
+}
+
+define i1 @cvt_icmp_1_zext_plus_sext_ne(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @cvt_icmp_1_zext_plus_sext_ne(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[ARG:%.*]], true
+; CHECK-NEXT: [[T:%.*]] = or i1 [[TMP0]], [[ARG1:%.*]]
+; CHECK-NEXT: ret i1 [[T]]
+;
+bb:
+ %i = zext i1 %arg to i32
+ %i2 = sext i1 %arg1 to i32
+ %i3 = add i32 %i, %i2
+ %t = icmp ne i32 %i3, 1
+ ret i1 %t
+}
+
+define i1 @cvt_icmp_2_zext_plus_sext_ne(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @cvt_icmp_2_zext_plus_sext_ne(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: ret i1 true
+;
+bb:
+ %i = zext i1 %arg to i32
+ %i2 = sext i1 %arg1 to i32
+ %i3 = add i32 %i, %i2
+ %t = icmp ne i32 %i3, 2
+ ret i1 %t
+}
+
+; test zext/zext additions with more than one use
+
+define i1 @test_cvt_icmp1(i1 %arg, i1 %arg1, ptr %p) {
+; CHECK-LABEL: @test_cvt_icmp1(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[I:%.*]] = zext i1 [[ARG:%.*]] to i32
+; CHECK-NEXT: [[I2:%.*]] = zext i1 [[ARG]] to i32
+; CHECK-NEXT: store i32 [[I2]], ptr [[P:%.*]], align 4
+; CHECK-NEXT: [[I4:%.*]] = add nuw nsw i32 [[I2]], [[I]]
+; CHECK-NEXT: [[T:%.*]] = icmp eq i32 [[I4]], 1
+; CHECK-NEXT: ret i1 [[T]]
+;
+bb:
+ %i = zext i1 %arg to i32
+ %i2 = zext i1 %arg to i32
+ store i32 %i2, ptr %p
+ %i3 = load i32, ptr %p
+ %i4 = add i32 %i3, %i
+ %t = icmp eq i32 %i4, 1
+ ret i1 %t
+}
+
+define i1 @test_cvt_icmp2(i1 %arg, i1 %arg1, ptr %p) {
+; CHECK-LABEL: @test_cvt_icmp2(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[I2:%.*]] = zext i1 [[ARG:%.*]] to i32
+; CHECK-NEXT: store i32 [[I2]], ptr [[P:%.*]], align 4
+; CHECK-NEXT: ret i1 false
+;
+bb:
+ %i = sext i1 %arg to i32
+ %i2 = zext i1 %arg to i32
+ store i32 %i2, ptr %p
+ %i3 = load i32, ptr %p
+ %i4 = add i32 %i3, %i
+ %t = icmp eq i32 %i4, 1
+ ret i1 %t
+}
+
+; tests for negative comparisons
+define i1 @test_cvt_icmp3(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @test_cvt_icmp3(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: ret i1 false
+;
+bb:
+ %i = zext i1 %arg to i32
+ %i2 = zext i1 %arg1 to i32
+ %i3 = add i32 %i2, %i
+ %i4 = icmp ult i32 %i3, 0
+ ret i1 %i4
+}
+
+define i1 @test_cvt_icmp4(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @test_cvt_icmp4(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[I4:%.*]] = or i1 [[ARG1:%.*]], [[ARG:%.*]]
+; CHECK-NEXT: ret i1 [[I4]]
+;
+bb:
+ %i = zext i1 %arg to i32
+ %i2 = zext i1 %arg1 to i32
+ %i3 = add i32 %i2, %i
+ %i4 = icmp ugt i32 %i3, 0
+ ret i1 %i4
+}
+
+define i1 @test_cvt_icmp5(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @test_cvt_icmp5(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[I4:%.*]] = or i1 [[ARG1:%.*]], [[ARG:%.*]]
+; CHECK-NEXT: ret i1 [[I4]]
+;
+bb:
+ %i = zext i1 %arg to i32
+ %i2 = zext i1 %arg1 to i32
+ %i3 = add i32 %i2, %i
+ %i4 = icmp uge i32 %i3, 1
+ ret i1 %i4
+}
+
+define i1 @test_cvt_icmp6(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @test_cvt_icmp6(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[I:%.*]] = zext i1 [[ARG:%.*]] to i32
+; CHECK-NEXT: [[I2:%.*]] = zext i1 [[ARG1:%.*]] to i32
+; CHECK-NEXT: [[I3:%.*]] = add nuw nsw i32 [[I2]], [[I]]
+; CHECK-NEXT: [[I4:%.*]] = icmp ult i32 [[I3]], 2
+; CHECK-NEXT: ret i1 [[I4]]
+;
+bb:
+ %i = zext i1 %arg to i32
+ %i2 = zext i1 %arg1 to i32
+ %i3 = add i32 %i2, %i
+ %i4 = icmp ule i32 %i3, 1
+ ret i1 %i4
+}
+
+define i1 @test_cvt_icmp7(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @test_cvt_icmp7(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[I:%.*]] = zext i1 [[ARG:%.*]] to i32
+; CHECK-NEXT: [[I2:%.*]] = zext i1 [[ARG1:%.*]] to i32
+; CHECK-NEXT: [[I3:%.*]] = add nuw nsw i32 [[I2]], [[I]]
+; CHECK-NEXT: [[I4:%.*]] = icmp ugt i32 [[I3]], 1
+; CHECK-NEXT: ret i1 [[I4]]
+;
+bb:
+ %i = zext i1 %arg to i32
+ %i2 = zext i1 %arg1 to i32
+ %i3 = add i32 %i2, %i
+ %i4 = icmp sgt i32 %i3, 1
+ ret i1 %i4
+}
+
+define i1 @test_cvt_icmp8(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @test_cvt_icmp8(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[I4:%.*]] = or i1 [[ARG1:%.*]], [[ARG:%.*]]
+; CHECK-NEXT: ret i1 [[I4]]
+;
+bb:
+ %i = zext i1 %arg to i32
+ %i2 = zext i1 %arg1 to i32
+ %i3 = add i32 %i2, %i
+ %i4 = icmp sge i32 %i3, 1
+ ret i1 %i4
+}
+
+define i1 @test_cvt_icmp9(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @test_cvt_icmp9(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[TMP0:%.*]] = or i1 [[ARG1:%.*]], [[ARG:%.*]]
+; CHECK-NEXT: [[I4:%.*]] = xor i1 [[TMP0]], true
+; CHECK-NEXT: ret i1 [[I4]]
+;
+bb:
+ %i = zext i1 %arg to i32
+ %i2 = zext i1 %arg1 to i32
+ %i3 = add i32 %i2, %i
+ %i4 = icmp slt i32 %i3, 1
+ ret i1 %i4
+}
+
+define i1 @test_cvt_icmp10(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @test_cvt_icmp10(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[I:%.*]] = zext i1 [[ARG:%.*]] to i32
+; CHECK-NEXT: [[I2:%.*]] = zext i1 [[ARG1:%.*]] to i32
+; CHECK-NEXT: [[I3:%.*]] = add nuw nsw i32 [[I2]], [[I]]
+; CHECK-NEXT: [[I4:%.*]] = icmp ult i32 [[I3]], 2
+; CHECK-NEXT: ret i1 [[I4]]
+;
+bb:
+ %i = zext i1 %arg to i32
+ %i2 = zext i1 %arg1 to i32
+ %i3 = add i32 %i2, %i
+ %i4 = icmp sle i32 %i3, 1
+ ret i1 %i4
+}
+
+define i1 @test_cvt_icmp11(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @test_cvt_icmp11(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[I:%.*]] = sext i1 [[ARG:%.*]] to i32
+; CHECK-NEXT: [[I2:%.*]] = sext i1 [[ARG1:%.*]] to i32
+; CHECK-NEXT: [[I3:%.*]] = add nsw i32 [[I2]], [[I]]
+; CHECK-NEXT: [[I4:%.*]] = icmp ugt i32 [[I3]], 2
+; CHECK-NEXT: ret i1 [[I4]]
+;
+bb:
+ %i = sext i1 %arg to i32
+ %i2 = sext i1 %arg1 to i32
+ %i3 = add i32 %i2, %i
+ %i4 = icmp ugt i32 %i3, 2
+ ret i1 %i4
+}
+
+define i1 @test_cvt_icmp12(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @test_cvt_icmp12(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[I4:%.*]] = or i1 [[ARG1:%.*]], [[ARG:%.*]]
+; CHECK-NEXT: ret i1 [[I4]]
+;
+bb:
+ %i = sext i1 %arg to i32
+ %i2 = sext i1 %arg1 to i32
+ %i3 = add i32 %i2, %i
+ %i4 = icmp uge i32 %i3, 1
+ ret i1 %i4
+}
+
+define i1 @test_cvt_icmp13(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @test_cvt_icmp13(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[TMP0:%.*]] = or i1 [[ARG1:%.*]], [[ARG:%.*]]
+; CHECK-NEXT: [[I4:%.*]] = xor i1 [[TMP0]], true
+; CHECK-NEXT: ret i1 [[I4]]
+;
+bb:
+ %i = sext i1 %arg to i32
+ %i2 = sext i1 %arg1 to i32
+ %i3 = add i32 %i2, %i
+ %i4 = icmp ult i32 %i3, 1
+ ret i1 %i4
+}
+
+define i1 @test_cvt_icmp14(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @test_cvt_icmp14(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[I:%.*]] = sext i1 [[ARG:%.*]] to i32
+; CHECK-NEXT: [[I2:%.*]] = sext i1 [[ARG1:%.*]] to i32
+; CHECK-NEXT: [[I3:%.*]] = add nsw i32 [[I2]], [[I]]
+; CHECK-NEXT: [[I4:%.*]] = icmp ult i32 [[I3]], 3
+; CHECK-NEXT: ret i1 [[I4]]
+;
+bb:
+ %i = sext i1 %arg to i32
+ %i2 = sext i1 %arg1 to i32
+ %i3 = add i32 %i2, %i
+ %i4 = icmp ule i32 %i3, 2
+ ret i1 %i4
+}
+
+define i1 @test_cvt_icmp15(i1 %arg, i1 %arg1) {
+; CHECK-LABEL: @test_cvt_icmp15(
+; CHECK-N...
[truncated]
|
alive2 links: // zext i1 Op0 + zext i1 Op1 != 1 --> !(xor i1 Op0, Op1) // zext i1 Op0 + zext i1 Op1 != 2 --> !(and i1 Op0, Op1) // sext i1 Op0 + sext i1 Op1 != 0 --> or i1 Op0, Op1 // sext i1 Op0 + sext i1 Op1 != 1 --> true // sext i1 Op0 + sext i1 Op1 != 2 --> true // sext i1 Op0 + zext i1 Op1 != 0 --> xor i1 Op0, Op1 // sext i1 Op0 + zext i1 Op1 != 1 --> Op0 | (!Op1) // sext i1 Op0 + zext i1 Op1 != 2 --> true |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As a high level comment, I'm not a big fan of this long list of special cases. An alternative approach to this would to construct the truth table for this explicitly (i.e. evaluate icmp pred (ext1 x), (ext2 y)
explicitly for all 4 x, y) and then use a helper that converts a binary truth table into logic.
Unfortunately, I don't think we have an existing helper to convert a truth table to logic (I think @goldsteinn wrote something for vternlog, which is not quite what we'd need here).
Do we have functions for building truth tables? |
Hi! I wrote a Python script to generate the lookup table.
|
@dtcxzyw So you fixed the issue? |
You can use the script/generated code in this PR. |
How should I use it? |
@dtcxzyw I don't see how the lookup-table based implementation improves this code. It makes it impossible to understand or verify the logic behind the transform. |
I will add a compile-time helper for this PR. Then we can write a constexpr binary function and match the truth tables. |
BTW, D143373 does a similar transformation with this PR. We should put the logic in |
To expand on my initial comment, what I had in mind is something like this (completely untested, probably will not compile): if (match(Add, m_c_Add(m_CombineAnd(m_Instruction(Ext0), m_OneUse(m_ZExtOrSExt(m_Value(Op0)))),
m_CombineAnd(m_Instruction(Ext1), m_OneUse(m_ZExtOrSExt(m_Value(Op1)))))) &&
Op0->getType()->isIntOrIntVectorTy(1) &&
Op1->getType()->isIntOrIntVectorTy(1)) {
std::bitset<4> Table;
auto ComputeTable = [&](bool Op0Val, bool Op1Val) {
int Res = 0;
if (Op0Val)
Res += isa<ZExtInst>(Ext0) ? 1 : -1;
if (Op1Val)
Res += isa<ZExtInst>(Ext1) ? 1 : -1;
return ICmpInst::compare(APInt(BW, Res, true), C, Pred);
};
Table[0] = ComputeTable(false, false);
Table[1] = ComputeTable(false, true);
Table[2] = ComputeTable(true, false);
Table[3] = ComputeTable(true, true);
return createLogicFromTable(Table, Op0, Op1);
}
// ...
static Value *createLogicFromTable(const std::bitset<4> &Table, Value *Op0, Value *Op1, IRBuilderBase &Builder) {
switch (Table.to_ulong()) {
case 0: // 0 0 0 0
return Builder.getFalse();
// etc.
case 6: // 0 1 1 0
return Builder.CreateXor(Op0, Op1);
// etc.
case 8: // 1 0 0 0
return Builder.CreateAnd(Op0, Op1);
// etc.
case 14: // 1 1 1 0
return Builder.CreateOr(Op0, Op1);
case 15: // 1 1 1 1
return Builder.getTrue();
}
} The computation of the logic table is pretty straightforward and can be done explicitly, without a static lookup table. The conversion from the table to logic is generic and not specific to this transform. This approach also automatically works with all predicates, not just equality predicates, so I think this would also subsume the code from D143373 linked above. (This approach would also be easy to generalize to binary operands other than |
I will update the pull, and add the lookup table. |
@nikic Just to make sure that I didn't miss something. |
@elhewaty Given that the implementation is predicate-independent, I don't think you need the complete test matrix, just some representative tests for other predicates. Also keep in mind that uge, ule, sge, sle will be canonicalized to ugt, ult, sgt, slt, so it will not be possible to test those predicates in the first place. |
@nikic what does m_CombineAnd function do? |
It requires that both arguments match. Here it is used to capture the sext/zext into a variable. You could also just use |
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you please try removing the code that was added in D143373? As far as I understand, your fold should fully subsume the old one.
@dtcxzyw Thanks, I will update the pull now. |
I recommend you work on this PR to track the review progress. Don't worry about creating a bunch of commits since they will be squashed before merging. |
b0db0ba
to
e1f3504
Compare
e1f3504
to
faef53a
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. I will test on my machine to reproduce this crash.
@dtcxzyw Are you one of the reviewers or code owners? |
No. But I am willing to review PRs related to the middle-end/RISC-V backend :) |
Should we add some multi-use/vector tests? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks basically good to me. Only one suggestion to relax the one-use checks.
Can you please squash the changes such that all the test additions are in the first commit and the implementation (and test diffs) are in the second one?
I'd suggest to also add a (negative) test where the sext/zext is from something other than i1. (Unless it already exists and I missed it...) |
5df3c25
to
9fa7f6b
Compare
@nickc Please if there are any further modifications will do them, but not in the next few weeks, as I am busy with other things. please keep the issue assigned to me and I will complete the work. |
9fa7f6b
to
14e6425
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Fixes #64859.