-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Fold (ct{t,l}z Pow2)
-> Log2(Pow2)
#122620
base: main
Are you sure you want to change the base?
Conversation
Do so we can find `Log2(Pow2)` for "free" with `takeLog2`
@llvm/pr-subscribers-llvm-transforms Author: None (goldsteinn) Changes
Full diff: https://github.com/llvm/llvm-project/pull/122620.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 7454382412369f..94240773f46a80 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -588,6 +588,22 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
}
}
+ // cttz(Pow2) -> Log2(Pow2)
+ // ctlz(Pow2) -> BitWidth - 1 - Log2(Pow2)
+ if (IsTZ || II.hasOneUse()) {
+ if (auto *R = IC.tryGetLog2(Op0, match(Op1, m_One()))) {
+ if (IsTZ)
+ return IC.replaceInstUsesWith(II, R);
+ BinaryOperator *BO = BinaryOperator::CreateSub(
+ ConstantInt::get(R->getType(),
+ R->getType()->getScalarSizeInBits() - 1),
+ R);
+ BO->setHasNoSignedWrap();
+ BO->setHasNoUnsignedWrap();
+ return BO;
+ }
+ }
+
KnownBits Known = IC.computeKnownBits(Op0, 0, &II);
// Create a mask for bits above (ctlz) or below (cttz) the first known one.
diff --git a/llvm/test/Transforms/InstCombine/cttz.ll b/llvm/test/Transforms/InstCombine/cttz.ll
index cb0bc59ae79958..5717e352c81e13 100644
--- a/llvm/test/Transforms/InstCombine/cttz.ll
+++ b/llvm/test/Transforms/InstCombine/cttz.ll
@@ -297,3 +297,72 @@ define i16 @cttz_assume(i16 %x) {
%cttz = call i16 @llvm.cttz.i16(i16 %x, i1 false)
ret i16 %cttz
}
+
+
+declare void @use.i8(i8)
+define i8 @fold_ctz_log2(i8 %x) {
+; CHECK-LABEL: @fold_ctz_log2(
+; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.umin.i8(i8 [[X:%.*]], i8 5)
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %p2 = shl i8 1, %x
+ %v = call i8 @llvm.umin(i8 %p2, i8 32)
+ %r = call i8 @llvm.cttz(i8 %v, i1 false)
+ ret i8 %r
+}
+
+define i8 @fold_ctz_log2_maybe_z(i8 %x, i8 %y, i1 %c) {
+; CHECK-LABEL: @fold_ctz_log2_maybe_z(
+; CHECK-NEXT: [[V:%.*]] = shl i8 2, [[V_V:%.*]]
+; CHECK-NEXT: [[P2_2:%.*]] = shl i8 4, [[Y:%.*]]
+; CHECK-NEXT: [[V1:%.*]] = select i1 [[C:%.*]], i8 [[V]], i8 [[P2_2]]
+; CHECK-NEXT: [[R:%.*]] = call range(i8 1, 9) i8 @llvm.cttz.i8(i8 [[V1]], i1 false)
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %p2 = shl i8 2, %x
+ %p2_2 = shl i8 4, %y
+ %v = select i1 %c, i8 %p2, i8 %p2_2
+ %r = call i8 @llvm.cttz(i8 %v, i1 false)
+ ret i8 %r
+}
+
+define i8 @fold_ctz_log2_maybe_z_okay(i8 %x, i8 %y, i1 %c) {
+; CHECK-LABEL: @fold_ctz_log2_maybe_z_okay(
+; CHECK-NEXT: [[X:%.*]] = add i8 [[X1:%.*]], 1
+; CHECK-NEXT: [[Y:%.*]] = add i8 [[Y1:%.*]], 2
+; CHECK-NEXT: [[V_V:%.*]] = select i1 [[C:%.*]], i8 [[X]], i8 [[Y]]
+; CHECK-NEXT: ret i8 [[V_V]]
+;
+ %p2 = shl i8 2, %x
+ %p2_2 = shl i8 4, %y
+ %v = select i1 %c, i8 %p2, i8 %p2_2
+ %r = call i8 @llvm.cttz(i8 %v, i1 true)
+ ret i8 %r
+}
+
+define i8 @fold_clz_log2(i8 %x) {
+; CHECK-LABEL: @fold_clz_log2(
+; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umin.i8(i8 [[X:%.*]], i8 5)
+; CHECK-NEXT: [[R:%.*]] = xor i8 [[TMP1]], 7
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %p2 = shl i8 1, %x
+ %v = call i8 @llvm.umin(i8 %p2, i8 32)
+ %r = call i8 @llvm.ctlz(i8 %v, i1 false)
+ ret i8 %r
+}
+
+define i8 @fold_clz_log2_fail_multi_use(i8 %x) {
+; CHECK-LABEL: @fold_clz_log2_fail_multi_use(
+; CHECK-NEXT: [[P2:%.*]] = shl nuw i8 1, [[X:%.*]]
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.umin.i8(i8 [[P2]], i8 32)
+; CHECK-NEXT: [[R:%.*]] = call range(i8 2, 9) i8 @llvm.ctlz.i8(i8 [[V]], i1 true)
+; CHECK-NEXT: call void @use.i8(i8 [[R]])
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %p2 = shl i8 1, %x
+ %v = call i8 @llvm.umin(i8 %p2, i8 32)
+ %r = call i8 @llvm.ctlz(i8 %v, i1 false)
+ call void @use.i8(i8 %r)
+ ret i8 %r
+}
|
(ct{t,l}z Pow2)
-> Log2(Pow2)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
@@ -588,6 +588,22 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { | |||
} | |||
} | |||
|
|||
// cttz(Pow2) -> Log2(Pow2) | |||
// ctlz(Pow2) -> BitWidth - 1 - Log2(Pow2) | |||
if (IsTZ || II.hasOneUse()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why does the number of uses of the intrinsic matter?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oops. I guess @goldsteinn means Op0->hasOneUse()
.
%v = call i8 @llvm.umin(i8 %p2, i8 32) | ||
%r = call i8 @llvm.ctlz(i8 %v, i1 false) | ||
ret i8 %r | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a test with non-pow2 bit width please.
(ct{t,l}z Pow2)
; NFC(ct{t,l}z Pow2)
->Log2(Pow2)
Do so we can find
Log2(Pow2)
for "free" withtakeLog2
https://alive2.llvm.org/ce/z/CL77fo