-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[CodeLayout][NFC] Using MergedVector to avoid extra vector allocations #68724
Conversation
@llvm/pr-subscribers-llvm-transforms Author: None (spupyrev) ChangesUsing a wrapper (MergedVector) around vectors to avoid extra vector allocations. Full diff: https://github.com/llvm/llvm-project/pull/68724.diff 1 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/CodeLayout.cpp b/llvm/lib/Transforms/Utils/CodeLayout.cpp
index 057a5e86c04aca1..2a023d1244fcb76 100644
--- a/llvm/lib/Transforms/Utils/CodeLayout.cpp
+++ b/llvm/lib/Transforms/Utils/CodeLayout.cpp
@@ -99,7 +99,7 @@ static cl::opt<unsigned> BackwardDistance(
cl::desc("The maximum distance (in bytes) of a backward jump for ExtTSP"));
// The maximum size of a chain created by the algorithm. The size is bounded
-// so that the algorithm can efficiently process extremely large instance.
+// so that the algorithm can efficiently process extremely large instances.
static cl::opt<unsigned>
MaxChainSize("ext-tsp-max-chain-size", cl::ReallyHidden, cl::init(4096),
cl::desc("The maximum size of a chain to create."));
@@ -217,8 +217,8 @@ struct NodeT {
NodeT &operator=(const NodeT &) = delete;
NodeT &operator=(NodeT &&) = default;
- explicit NodeT(size_t Index, uint64_t Size, uint64_t EC)
- : Index(Index), Size(Size), ExecutionCount(EC) {}
+ explicit NodeT(size_t Index, uint64_t Size, uint64_t Count)
+ : Index(Index), Size(Size), ExecutionCount(Count) {}
bool isEntry() const { return Index == 0; }
@@ -475,14 +475,14 @@ void ChainT::mergeEdges(ChainT *Other) {
}
}
-using NodeIter = std::vector<NodeT *>::const_iterator;
+/// A wrapper around three concatenated vectors (chains) of objects; it is used
+/// to avoid extra instantiation of the vectors.
+template <typename ObjType> struct MergedVector {
+ using ObjIter = typename std::vector<ObjType *>::const_iterator;
-/// A wrapper around three chains of nodes; it is used to avoid extra
-/// instantiation of the vectors.
-struct MergedChain {
- MergedChain(NodeIter Begin1, NodeIter End1, NodeIter Begin2 = NodeIter(),
- NodeIter End2 = NodeIter(), NodeIter Begin3 = NodeIter(),
- NodeIter End3 = NodeIter())
+ MergedVector(ObjIter Begin1, ObjIter End1, ObjIter Begin2 = ObjIter(),
+ ObjIter End2 = ObjIter(), ObjIter Begin3 = ObjIter(),
+ ObjIter End3 = ObjIter())
: Begin1(Begin1), End1(End1), Begin2(Begin2), End2(End2), Begin3(Begin3),
End3(End3) {}
@@ -507,13 +507,26 @@ struct MergedChain {
const NodeT *getFirstNode() const { return *Begin1; }
+ bool empty() const { return Begin1 == End1; }
+
+ void append(ObjIter Begin, ObjIter End) {
+ if (Begin2 == End2) {
+ Begin2 = Begin;
+ End2 = End;
+ return;
+ }
+ assert(Begin3 == End3 && "cannot extend MergedVector");
+ Begin3 = Begin;
+ End3 = End;
+ }
+
private:
- NodeIter Begin1;
- NodeIter End1;
- NodeIter Begin2;
- NodeIter End2;
- NodeIter Begin3;
- NodeIter End3;
+ ObjIter Begin1;
+ ObjIter End1;
+ ObjIter Begin2;
+ ObjIter End2;
+ ObjIter Begin3;
+ ObjIter End3;
};
/// Merge two chains of nodes respecting a given 'type' and 'offset'.
@@ -521,29 +534,29 @@ struct MergedChain {
/// If MergeType == 0, then the result is a concatenation of two chains.
/// Otherwise, the first chain is cut into two sub-chains at the offset,
/// and merged using all possible ways of concatenating three chains.
-MergedChain mergeNodes(const std::vector<NodeT *> &X,
- const std::vector<NodeT *> &Y, size_t MergeOffset,
- MergeTypeT MergeType) {
+MergedVector<NodeT> mergeNodes(const std::vector<NodeT *> &X,
+ const std::vector<NodeT *> &Y,
+ size_t MergeOffset, MergeTypeT MergeType) {
// Split the first chain, X, into X1 and X2.
- NodeIter BeginX1 = X.begin();
- NodeIter EndX1 = X.begin() + MergeOffset;
- NodeIter BeginX2 = X.begin() + MergeOffset;
- NodeIter EndX2 = X.end();
- NodeIter BeginY = Y.begin();
- NodeIter EndY = Y.end();
+ MergedVector<NodeT>::ObjIter BeginX1 = X.begin();
+ MergedVector<NodeT>::ObjIter EndX1 = X.begin() + MergeOffset;
+ MergedVector<NodeT>::ObjIter BeginX2 = X.begin() + MergeOffset;
+ MergedVector<NodeT>::ObjIter EndX2 = X.end();
+ MergedVector<NodeT>::ObjIter BeginY = Y.begin();
+ MergedVector<NodeT>::ObjIter EndY = Y.end();
// Construct a new chain from the three existing ones.
switch (MergeType) {
case MergeTypeT::X_Y:
- return MergedChain(BeginX1, EndX2, BeginY, EndY);
+ return MergedVector<NodeT>(BeginX1, EndX2, BeginY, EndY);
case MergeTypeT::Y_X:
- return MergedChain(BeginY, EndY, BeginX1, EndX2);
+ return MergedVector<NodeT>(BeginY, EndY, BeginX1, EndX2);
case MergeTypeT::X1_Y_X2:
- return MergedChain(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2);
+ return MergedVector<NodeT>(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2);
case MergeTypeT::Y_X2_X1:
- return MergedChain(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1);
+ return MergedVector<NodeT>(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1);
case MergeTypeT::X2_X1_Y:
- return MergedChain(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY);
+ return MergedVector<NodeT>(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY);
}
llvm_unreachable("unexpected chain merge type");
}
@@ -618,6 +631,7 @@ class ExtTSPImpl {
AllChains.reserve(NumNodes);
HotChains.reserve(NumNodes);
for (NodeT &Node : AllNodes) {
+ // Create a chain.
AllChains.emplace_back(Node.Index, &Node);
Node.CurChain = &AllChains.back();
if (Node.ExecutionCount > 0)
@@ -630,13 +644,13 @@ class ExtTSPImpl {
for (JumpT *Jump : PredNode.OutJumps) {
NodeT *SuccNode = Jump->Target;
ChainEdge *CurEdge = PredNode.CurChain->getEdge(SuccNode->CurChain);
- // this edge is already present in the graph.
+ // This edge is already present in the graph.
if (CurEdge != nullptr) {
assert(SuccNode->CurChain->getEdge(PredNode.CurChain) != nullptr);
CurEdge->appendJump(Jump);
continue;
}
- // this is a new edge.
+ // This is a new edge.
AllEdges.emplace_back(Jump);
PredNode.CurChain->addEdge(SuccNode->CurChain, &AllEdges.back());
SuccNode->CurChain->addEdge(PredNode.CurChain, &AllEdges.back());
@@ -649,7 +663,7 @@ class ExtTSPImpl {
/// to B are from A. Such nodes should be adjacent in the optimal ordering;
/// the method finds and merges such pairs of nodes.
void mergeForcedPairs() {
- // Find fallthroughs based on edge weights.
+ // Find forced pairs of blocks.
for (NodeT &Node : AllNodes) {
if (SuccNodes[Node.Index].size() == 1 &&
PredNodes[SuccNodes[Node.Index][0]].size() == 1 &&
@@ -699,9 +713,7 @@ class ExtTSPImpl {
/// Deterministically compare pairs of chains.
auto compareChainPairs = [](const ChainT *A1, const ChainT *B1,
const ChainT *A2, const ChainT *B2) {
- if (A1 != A2)
- return A1->Id < A2->Id;
- return B1->Id < B2->Id;
+ return std::make_tuple(A1->Id, B1->Id) < std::make_tuple(A2->Id, B2->Id);
};
while (HotChains.size() > 1) {
@@ -769,24 +781,25 @@ class ExtTSPImpl {
}
/// Compute the Ext-TSP score for a given node order and a list of jumps.
- double extTSPScore(const MergedChain &MergedBlocks,
- const std::vector<JumpT *> &Jumps) const {
- if (Jumps.empty())
+ double extTSPScore(const MergedVector<NodeT> &Nodes,
+ const MergedVector<JumpT> &Jumps) const {
+ if (Jumps.empty() || Nodes.empty())
return 0.0;
+
uint64_t CurAddr = 0;
- MergedBlocks.forEach([&](const NodeT *Node) {
+ Nodes.forEach([&](const NodeT *Node) {
Node->EstimatedAddr = CurAddr;
CurAddr += Node->Size;
});
double Score = 0;
- for (JumpT *Jump : Jumps) {
+ Jumps.forEach([&](const JumpT *Jump) {
const NodeT *SrcBlock = Jump->Source;
const NodeT *DstBlock = Jump->Target;
Score += ::extTSPScore(SrcBlock->EstimatedAddr, SrcBlock->Size,
DstBlock->EstimatedAddr, Jump->ExecutionCount,
Jump->IsConditional);
- }
+ });
return Score;
}
@@ -798,17 +811,15 @@ class ExtTSPImpl {
/// element being the corresponding merging type.
MergeGainT getBestMergeGain(ChainT *ChainPred, ChainT *ChainSucc,
ChainEdge *Edge) const {
- if (Edge->hasCachedMergeGain(ChainPred, ChainSucc)) {
+ if (Edge->hasCachedMergeGain(ChainPred, ChainSucc))
return Edge->getCachedMergeGain(ChainPred, ChainSucc);
- }
// Precompute jumps between ChainPred and ChainSucc.
- auto Jumps = Edge->jumps();
- ChainEdge *EdgePP = ChainPred->getEdge(ChainPred);
- if (EdgePP != nullptr) {
- Jumps.insert(Jumps.end(), EdgePP->jumps().begin(), EdgePP->jumps().end());
- }
+ MergedVector<JumpT> Jumps(Edge->jumps().begin(), Edge->jumps().end());
assert(!Jumps.empty() && "trying to merge chains w/o jumps");
+ ChainEdge *EdgePP = ChainPred->getEdge(ChainPred);
+ if (EdgePP != nullptr)
+ Jumps.append(EdgePP->jumps().begin(), EdgePP->jumps().end());
// This object holds the best chosen gain of merging two chains.
MergeGainT Gain = MergeGainT();
@@ -875,19 +886,20 @@ class ExtTSPImpl {
///
/// The two chains are not modified in the method.
MergeGainT computeMergeGain(const ChainT *ChainPred, const ChainT *ChainSucc,
- const std::vector<JumpT *> &Jumps,
+ const MergedVector<JumpT> &Jumps,
size_t MergeOffset, MergeTypeT MergeType) const {
- auto MergedBlocks =
+ MergedVector<NodeT> MergedNodes =
mergeNodes(ChainPred->Nodes, ChainSucc->Nodes, MergeOffset, MergeType);
// Do not allow a merge that does not preserve the original entry point.
if ((ChainPred->isEntry() || ChainSucc->isEntry()) &&
- !MergedBlocks.getFirstNode()->isEntry())
+ !MergedNodes.getFirstNode()->isEntry())
return MergeGainT();
// The gain for the new chain.
- auto NewGainScore = extTSPScore(MergedBlocks, Jumps) - ChainPred->Score;
- return MergeGainT(NewGainScore, MergeOffset, MergeType);
+ double NewScore = extTSPScore(MergedNodes, Jumps);
+ double CurScore = ChainPred->Score;
+ return MergeGainT(NewScore - CurScore, MergeOffset, MergeType);
}
/// Merge chain From into chain Into, update the list of active chains,
@@ -897,7 +909,7 @@ class ExtTSPImpl {
assert(Into != From && "a chain cannot be merged with itself");
// Merge the nodes.
- MergedChain MergedNodes =
+ MergedVector<NodeT> MergedNodes =
mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType);
Into->merge(From, MergedNodes.getNodes());
@@ -908,8 +920,10 @@ class ExtTSPImpl {
// Update cached ext-tsp score for the new chain.
ChainEdge *SelfEdge = Into->getEdge(Into);
if (SelfEdge != nullptr) {
- MergedNodes = MergedChain(Into->Nodes.begin(), Into->Nodes.end());
- Into->Score = extTSPScore(MergedNodes, SelfEdge->jumps());
+ MergedNodes = MergedVector<NodeT>(Into->Nodes.begin(), Into->Nodes.end());
+ MergedVector<JumpT> MergedJumps(SelfEdge->jumps().begin(),
+ SelfEdge->jumps().end());
+ Into->Score = extTSPScore(MergedNodes, MergedJumps);
}
// Remove the chain from the list of active chains.
@@ -1255,7 +1269,7 @@ class CDSortImpl {
}
/// Compute the change of the distance locality after merging the chains.
- double distBasedLocalityGain(const MergedChain &MergedBlocks,
+ double distBasedLocalityGain(const MergedVector<NodeT> &MergedBlocks,
const std::vector<JumpT *> &Jumps) const {
if (Jumps.empty())
return 0.0;
@@ -1283,7 +1297,7 @@ class CDSortImpl {
assert(Into != From && "a chain cannot be merged with itself");
// Merge the nodes.
- MergedChain MergedNodes =
+ MergedVector<NodeT> MergedNodes =
mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType);
Into->merge(From, MergedNodes.getNodes());
|
if (EdgePP != nullptr) { | ||
Jumps.insert(Jumps.end(), EdgePP->jumps().begin(), EdgePP->jumps().end()); | ||
} | ||
MergedVector<JumpT> Jumps(Edge->jumps().begin(), Edge->jumps().end()); | ||
assert(!Jumps.empty() && "trying to merge chains w/o jumps"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Change or remove this assertion (the semantics of the Jumps object has changed)?
assert(!Jumps.empty() && "trying to merge chains w/o jumps"); | ||
ChainEdge *EdgePP = ChainPred->getEdge(ChainPred); | ||
if (EdgePP != nullptr) | ||
Jumps.append(EdgePP->jumps().begin(), EdgePP->jumps().end()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's not clear why MergedVector is used for Jumps. I think it's better to define a separate struct for it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am not sure i understand this suggestion, can you clarify?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IIUC, Jumps
is more restricted than this MergedVector
type. For instance, it doesn't need MergedVector::mergeNodes
. The only function it uses is ForEach
. However, it uses one function that the other use case (chains of nodes) does not need (append
). So why don't we define a separate struct for it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Separated the two structs as suggested
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@rlavaee let me know if you think some other refactoring is needed here. Thanks
fb9d9a2
to
6f8c6ca
Compare
} | ||
|
||
private: | ||
JumpIter Begin1; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are Begin1
and End1
always going to be the beginning and ending of a single vector? If yes, can we simplify this further by storing pointers to two vectors:
std::array<std::vector<JumpT *>, 2> Jumps;
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wouldn't std::array<std::vector<JumpT *>, 2>
instantiate the vector during construction? That's exactly what I want to avoid. Or did you mean std::array<std::pair<JumpIter, JumpIter>, 2>
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, I actually meant std::array<std::vector<JumpT *>*, 2>
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Changed to std::array<std::vector<JumpT *>*, 2>
as suggested.
I really hope that you find the new version cleaner/simpler because I am not sure about myself :)
|
||
// Precompute jumps between ChainPred and ChainSucc. | ||
auto Jumps = Edge->jumps(); | ||
MergedJumpsT Jumps(&Edge->jumps()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One last change. It's unusual for structs to have private members. We can make JumpArray
public and then use default initializers.
MergedJumpsT Jumps ({&Edge->jumps(), EdgePP ? &EdgePP->jumps() : nullptr});
Then you don't need to define append anymore.
/// A wrapper around two concatenated vectors (chains) of jumps. | ||
struct MergedJumpsT { | ||
MergedJumpsT(const std::vector<JumpT *> *Jumps1, | ||
const std::vector<JumpT *> *Jumps2 = nullptr) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This default value is unused.
f528142
to
471bcab
Compare
Using a wrapper (MergedVector) around vectors to avoid extra vector allocations.
Plus a few edits in the comments.