Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FuncSpec] Update function specialization to handle phi-chains #72903

Merged
merged 3 commits into from
Nov 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,22 @@ class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
Cost estimateSwitchInst(SwitchInst &I);
Cost estimateBranchInst(BranchInst &I);

// Transitively Incoming Values (TIV) is a set of Values that can "feed" a
// value to the initial PHI-node. It is defined like this:
//
// * the initial PHI-node belongs to TIV.
//
// * for every PHI-node in TIV, its operands belong to TIV
//
// If TIV for the initial PHI-node (P) contains more than one constant or a
// value that is not a PHI-node, then P cannot be folded to a constant.
//
// As soon as we detect these cases, we bail, without constructing the
// full TIV.
// Otherwise P can be folded to the one constant in TIV.
bool discoverTransitivelyIncomingValues(Constant *Const, PHINode *Root,
DenseSet<PHINode *> &TransitivePHIs);

Constant *visitInstruction(Instruction &I) { return nullptr; }
Constant *visitPHINode(PHINode &I);
Constant *visitFreezeInst(FreezeInst &I);
Expand Down
108 changes: 94 additions & 14 deletions llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,17 @@ static cl::opt<unsigned> MaxClones(
"The maximum number of clones allowed for a single function "
"specialization"));

static cl::opt<unsigned>
MaxDiscoveryIterations("funcspec-max-discovery-iterations", cl::init(100),
cl::Hidden,
cl::desc("The maximum number of iterations allowed "
"when searching for transitive "
"phis"));

static cl::opt<unsigned> MaxIncomingPhiValues(
"funcspec-max-incoming-phi-values", cl::init(4), cl::Hidden, cl::desc(
"The maximum number of incoming values a PHI node can have to be "
"considered during the specialization bonus estimation"));
"funcspec-max-incoming-phi-values", cl::init(8), cl::Hidden,
cl::desc("The maximum number of incoming values a PHI node can have to be "
"considered during the specialization bonus estimation"));

static cl::opt<unsigned> MaxBlockPredecessors(
"funcspec-max-block-predecessors", cl::init(2), cl::Hidden, cl::desc(
Expand All @@ -64,9 +71,9 @@ static cl::opt<unsigned> MinCodeSizeSavings(
"much percent of the original function size"));

static cl::opt<unsigned> MinLatencySavings(
"funcspec-min-latency-savings", cl::init(70), cl::Hidden, cl::desc(
"Reject specializations whose latency savings are less than this"
"much percent of the original function size"));
"funcspec-min-latency-savings", cl::init(40), cl::Hidden,
cl::desc("Reject specializations whose latency savings are less than this"
"much percent of the original function size"));

static cl::opt<unsigned> MinInliningBonus(
"funcspec-min-inlining-bonus", cl::init(300), cl::Hidden, cl::desc(
Expand Down Expand Up @@ -262,29 +269,102 @@ Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
return estimateBasicBlocks(WorkList);
}

bool InstCostVisitor::discoverTransitivelyIncomingValues(
Constant *Const, PHINode *Root, DenseSet<PHINode *> &TransitivePHIs) {

SmallVector<PHINode *, 64> WorkList;
WorkList.push_back(Root);
unsigned Iter = 0;

while (!WorkList.empty()) {
PHINode *PN = WorkList.pop_back_val();

if (++Iter > MaxDiscoveryIterations ||
PN->getNumIncomingValues() > MaxIncomingPhiValues)
return false;

if (!TransitivePHIs.insert(PN).second)
continue;

for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
Value *V = PN->getIncomingValue(I);

// Disregard self-references and dead incoming values.
if (auto *Inst = dyn_cast<Instruction>(V))
if (Inst == PN || DeadBlocks.contains(PN->getIncomingBlock(I)))
continue;

if (Constant *C = findConstantFor(V, KnownConstants)) {
// Not all incoming values are the same constant. Bail immediately.
if (C != Const)
return false;
continue;
}

if (auto *Phi = dyn_cast<PHINode>(V)) {
WorkList.push_back(Phi);
continue;
}

// We can't reason about anything else.
return false;
}
}
return true;
}

Constant *InstCostVisitor::visitPHINode(PHINode &I) {
if (I.getNumIncomingValues() > MaxIncomingPhiValues)
return nullptr;

bool Inserted = VisitedPHIs.insert(&I).second;
Constant *Const = nullptr;
bool HaveSeenIncomingPHI = false;

for (unsigned Idx = 0, E = I.getNumIncomingValues(); Idx != E; ++Idx) {
Value *V = I.getIncomingValue(Idx);

// Disregard self-references and dead incoming values.
if (auto *Inst = dyn_cast<Instruction>(V))
if (Inst == &I || DeadBlocks.contains(I.getIncomingBlock(Idx)))
continue;
Constant *C = findConstantFor(V, KnownConstants);
if (!C) {
if (Inserted)
PendingPHIs.push_back(&I);
return nullptr;

if (Constant *C = findConstantFor(V, KnownConstants)) {
if (!Const)
Const = C;
// Not all incoming values are the same constant. Bail immediately.
if (C != Const)
return nullptr;
continue;
}
if (!Const)
Const = C;
else if (C != Const)

if (Inserted) {
// First time we are seeing this phi. We will retry later, after
// all the constant arguments have been propagated. Bail for now.
PendingPHIs.push_back(&I);
return nullptr;
}

if (isa<PHINode>(V)) {
// Perhaps it is a Transitive Phi. We will confirm later.
HaveSeenIncomingPHI = true;
continue;
}

// We can't reason about anything else.
return nullptr;
}

if (!Const)
return nullptr;

if (!HaveSeenIncomingPHI)
return Const;

DenseSet<PHINode *> TransitivePHIs;
if (!discoverTransitivelyIncomingValues(Const, &I, TransitivePHIs))
return nullptr;

return Const;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
;
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=20 -funcspec-for-literal-constant -S < %s | FileCheck %s --check-prefix=FUNCSPEC
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=20 -funcspec-for-literal-constant -funcspec-max-discovery-iterations=16 -S < %s | FileCheck %s --check-prefix=NOFUNCSPEC

define i64 @bar(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10) {
; FUNCSPEC-LABEL: define i64 @bar(
; FUNCSPEC-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i1 [[C5:%.*]], i1 [[C6:%.*]], i1 [[C7:%.*]], i1 [[C8:%.*]], i1 [[C9:%.*]], i1 [[C10:%.*]]) {
; FUNCSPEC-NEXT: entry:
; FUNCSPEC-NEXT: [[F1:%.*]] = call i64 @foo.specialized.1(i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0:![0-9]+]]
; FUNCSPEC-NEXT: [[F2:%.*]] = call i64 @foo.specialized.2(i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG1:![0-9]+]]
; FUNCSPEC-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[F1]], [[F2]]
; FUNCSPEC-NEXT: ret i64 [[ADD]]
;
; NOFUNCSPEC-LABEL: define i64 @bar(
; NOFUNCSPEC-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i1 [[C5:%.*]], i1 [[C6:%.*]], i1 [[C7:%.*]], i1 [[C8:%.*]], i1 [[C9:%.*]], i1 [[C10:%.*]]) {
; NOFUNCSPEC-NEXT: entry:
; NOFUNCSPEC-NEXT: [[F1:%.*]] = call i64 @foo(i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0:![0-9]+]]
; NOFUNCSPEC-NEXT: [[F2:%.*]] = call i64 @foo(i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]], i1 [[C5]], i1 [[C6]], i1 [[C7]], i1 [[C8]], i1 [[C9]], i1 [[C10]]), !range [[RNG0]]
; NOFUNCSPEC-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[F1]], [[F2]]
; NOFUNCSPEC-NEXT: ret i64 [[ADD]]
;
entry:
%f1 = call i64 @foo(i64 3, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10)
%f2 = call i64 @foo(i64 4, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10)
%add = add i64 %f1, %f2
ret i64 %add
}

define internal i64 @foo(i64 %n, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5, i1 %c6, i1 %c7, i1 %c8, i1 %c9, i1 %c10) {
entry:
br i1 %c1, label %l1, label %l9

l1:
%phi1 = phi i64 [ %n, %entry ], [ %phi2, %l2 ]
%add = add i64 %phi1, 1
%div = sdiv i64 %add, 2
br i1 %c2, label %l1_5, label %exit

l1_5:
br i1 %c3, label %l1_75, label %l6

l1_75:
br i1 %c4, label %l2, label %l3

l2:
%phi2 = phi i64 [ %phi1, %l1_75 ], [ %phi3, %l3 ]
br label %l1

l3:
%phi3 = phi i64 [ %phi1, %l1_75 ], [ %phi4, %l4 ]
br label %l2

l4:
%phi4 = phi i64 [ %phi5, %l5 ], [ %phi6, %l6 ]
br i1 %c5, label %l3, label %l6

l5:
%phi5 = phi i64 [ %phi6, %l6_5 ], [ %phi7, %l7 ]
br label %l4

l6:
%phi6 = phi i64 [ %phi4, %l4 ], [ %phi1, %l1_5 ]
br i1 %c6, label %l4, label %l6_5

l6_5:
br i1 %c7, label %l5, label %l8

l7:
%phi7 = phi i64 [ %phi9, %l9 ], [ %phi8, %l8 ]
br i1 %c8, label %l5, label %l8

l8:
%phi8 = phi i64 [ %phi6, %l6_5 ], [ %phi7, %l7 ]
br i1 %c9, label %l7, label %l9

l9:
%phi9 = phi i64 [ %n, %entry ], [ %phi8, %l8 ]
%sub = sub i64 %phi9, 1
%mul = mul i64 %sub, 2
br i1 %c10, label %l7, label %exit

exit:
%res = phi i64 [ %div, %l1 ], [ %mul, %l9]
ret i64 %res
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=10 -funcspec-for-literal-constant -S < %s | FileCheck %s

define i64 @bar(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i64 %x1) {
; CHECK-LABEL: define i64 @bar(
; CHECK-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i64 [[X1:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[F1:%.*]] = call i64 @foo.specialized.1(i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]])
; CHECK-NEXT: [[F2:%.*]] = call i64 @foo(i64 [[X1]], i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]])
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[F1]], [[F2]]
; CHECK-NEXT: ret i64 [[ADD]]
;
entry:
%f1 = call i64 @foo(i64 3, i1 %c1, i1 %c2, i1 %c3, i1 %c4)
%f2 = call i64 @foo(i64 %x1, i1 %c1, i1 %c2, i1 %c3, i1 %c4)
%add = add i64 %f1, %f2
ret i64 %add
}

define internal i64 @foo(i64 %n, i1 %c1, i1 %c2, i1 %c3, i1 %c4) {
entry:
br label %l0

l1:
%phi1 = phi i64 [ %phi0, %l0 ], [ %phi2, %l2 ]
%add = add i64 %phi1, 1
%div = sdiv i64 %add, 2
br i1 %c2, label %l2, label %exit

l2:
%phi2 = phi i64 [ %phi0, %l0 ], [ %phi1, %l1 ]
%sub = sub i64 %phi2, 1
%mul = mul i64 %sub, 2
br i1 %c4, label %l1, label %exit

l0:
%phi0 = phi i64 [ %n, %entry ]
br i1 %c1, label %l1, label %l2

exit:
%res = phi i64 [ %div, %l1 ], [ %mul, %l2]
ret i64 %res
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=10 -funcspec-for-literal-constant -S < %s | FileCheck %s

define i64 @bar(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i64 %x1) {
; CHECK-LABEL: define i64 @bar(
; CHECK-SAME: i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]], i1 [[C4:%.*]], i64 [[X1:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[F1:%.*]] = call i64 @foo(i64 3, i64 4, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]])
; CHECK-NEXT: [[F2:%.*]] = call i64 @foo(i64 4, i64 [[X1]], i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]])
; CHECK-NEXT: [[F3:%.*]] = call i64 @foo.specialized.1(i64 3, i64 3, i1 [[C1]], i1 [[C2]], i1 [[C3]], i1 [[C4]])
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[F1]], [[F2]]
; CHECK-NEXT: [[ADD2:%.*]] = add i64 [[ADD]], [[F3]]
; CHECK-NEXT: ret i64 [[ADD2]]
;
entry:
%f1 = call i64 @foo(i64 3, i64 4, i1 %c1, i1 %c2, i1 %c3, i1 %c4)
%f2 = call i64 @foo(i64 4, i64 %x1, i1 %c1, i1 %c2, i1 %c3, i1 %c4)
%f3 = call i64 @foo(i64 3, i64 3, i1 %c1, i1 %c2, i1 %c3, i1 %c4)
%add = add i64 %f1, %f2
%add2 = add i64 %add, %f3
ret i64 %add2
}

define internal i64 @foo(i64 %n, i64 %m, i1 %c1, i1 %c2, i1 %c3, i1 %c4) {
entry:
br i1 %c1, label %l1, label %l4

l1:
%phi1 = phi i64 [ %n, %entry ], [ %phi2, %l2 ]
%add = add i64 %phi1, 1
%div = sdiv i64 %add, 2
br i1 %c2, label %l1_5, label %exit

l1_5:
br i1 %c3, label %l2, label %l3

l2:
%phi2 = phi i64 [ %phi1, %l1_5 ], [ %phi3, %l3 ]
br label %l1

l3:
%phi3 = phi i64 [ %phi1, %l1_5 ], [ %m, %l4 ]
br i1 %c2, label %l4, label %l2

l4:
%phi4 = phi i64 [ %n, %entry ], [ %phi3, %l3 ]
%sub = sub i64 %phi4, 1
%mul = mul i64 %sub, 2
br i1 %c4, label %l3, label %exit

exit:
%res = phi i64 [ %div, %l1 ], [ %mul, %l4]
ret i64 %res
}