-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SandboxVec][BottomUpVec] Use SeedCollector and slice seeds #120826
Conversation
@llvm/pr-subscribers-vectorizers Author: vporpo (vporpo) ChangesWith this patch we switch from the temporary dummy seeds to actual seeds provided by the seed collector. Full diff: https://github.com/llvm/llvm-project/pull/120826.diff 13 Files Affected:
diff --git a/llvm/include/llvm/SandboxIR/Pass.h b/llvm/include/llvm/SandboxIR/Pass.h
index 4f4eae87cd3ff7..267389a8a87a2e 100644
--- a/llvm/include/llvm/SandboxIR/Pass.h
+++ b/llvm/include/llvm/SandboxIR/Pass.h
@@ -16,6 +16,7 @@ namespace llvm {
class AAResults;
class ScalarEvolution;
+class TargetTransformInfo;
namespace sandboxir {
@@ -25,15 +26,18 @@ class Region;
class Analyses {
AAResults *AA = nullptr;
ScalarEvolution *SE = nullptr;
+ TargetTransformInfo *TTI = nullptr;
Analyses() = default;
public:
- Analyses(AAResults &AA, ScalarEvolution &SE) : AA(&AA), SE(&SE) {}
+ Analyses(AAResults &AA, ScalarEvolution &SE, TargetTransformInfo &TTI)
+ : AA(&AA), SE(&SE), TTI(&TTI) {}
public:
AAResults &getAA() const { return *AA; }
ScalarEvolution &getScalarEvolution() const { return *SE; }
+ TargetTransformInfo &getTTI() const { return *TTI; }
/// For use by unit tests.
static Analyses emptyForTesting() { return Analyses(); }
};
diff --git a/llvm/include/llvm/SandboxIR/Utils.h b/llvm/include/llvm/SandboxIR/Utils.h
index a73498adea1d59..d58fe522143953 100644
--- a/llvm/include/llvm/SandboxIR/Utils.h
+++ b/llvm/include/llvm/SandboxIR/Utils.h
@@ -60,11 +60,16 @@ class Utils {
getUnderlyingObject(LSI->getPointerOperand()->Val));
}
+ /// \Returns the number of bits of \p Ty.
+ static unsigned getNumBits(Type *Ty, const DataLayout &DL) {
+ return DL.getTypeSizeInBits(Ty->LLVMTy);
+ }
+
/// \Returns the number of bits required to represent the operands or return
/// value of \p V in \p DL.
static unsigned getNumBits(Value *V, const DataLayout &DL) {
Type *Ty = getExpectedType(V);
- return DL.getTypeSizeInBits(Ty->LLVMTy);
+ return getNumBits(Ty, DL);
}
/// \Returns the number of bits required to represent the operands or
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
index 63d6ef31c86453..233cf82a1b3dfb 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
@@ -177,6 +177,7 @@ class LegalityAnalysis {
// TODO: Try to remove the SkipScheduling argument by refactoring the tests.
const LegalityResult &canVectorize(ArrayRef<Value *> Bndl,
bool SkipScheduling = false);
+ void clear() { Sched.clear(); }
};
} // namespace llvm::sandboxir
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h
index 3959f84c601e04..1e8c0101cf77cd 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h
@@ -143,6 +143,13 @@ class Scheduler {
~Scheduler() {}
bool trySchedule(ArrayRef<Instruction *> Instrs);
+ /// Clear the scheduler's state, including the DAG.
+ void clear() {
+ Bndls.clear();
+ // TODO: clear view once it lands.
+ DAG.clear();
+ ScheduleTopItOpt = std::nullopt;
+ }
#ifndef NDEBUG
void dump(raw_ostream &OS) const;
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
index 6e16a84d832e5e..73b2bdf8f181f6 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
@@ -95,8 +95,8 @@ class SeedBundle {
/// with a total size <= \p MaxVecRegBits, or an empty slice if the
/// requirements cannot be met . If \p ForcePowOf2 is true, then the returned
/// slice will have a total number of bits that is a power of 2.
- MutableArrayRef<Instruction *>
- getSlice(unsigned StartIdx, unsigned MaxVecRegBits, bool ForcePowOf2);
+ ArrayRef<Instruction *> getSlice(unsigned StartIdx, unsigned MaxVecRegBits,
+ bool ForcePowOf2);
/// \Returns the number of seed elements in the bundle.
std::size_t size() const { return Seeds.size(); }
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h
index fc9d67fcfcdec4..519777cf87d614 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h
@@ -133,6 +133,16 @@ class VecUtils {
assert(tryGetCommonScalarType(Bndl) && "Expected common scalar type!");
return ScalarTy;
}
+ /// \Returns the first integer power of 2 that is <= Num.
+ static unsigned getFloorPowerOf2(unsigned Num) {
+ if (Num == 0)
+ return Num;
+ unsigned Mask = Num;
+ Mask >>= 1;
+ for (int ShiftBy = 1; ShiftBy < 32; ShiftBy <<= 1)
+ Mask |= Mask >> ShiftBy;
+ return Num & ~Mask;
+ }
};
} // namespace llvm::sandboxir
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
index a2ea11be59b8ed..18e072c17d202b 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
@@ -8,29 +8,31 @@
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/SandboxIR/Function.h"
#include "llvm/SandboxIR/Instruction.h"
#include "llvm/SandboxIR/Module.h"
#include "llvm/SandboxIR/Utils.h"
#include "llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.h"
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h"
#include "llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h"
-namespace llvm::sandboxir {
+namespace llvm {
+
+static cl::opt<unsigned>
+ OverrideVecRegBits("sbvec-vec-reg-bits", cl::init(0), cl::Hidden,
+ cl::desc("Override the vector register size in bits, "
+ "which is otherwise found by querying TTI."));
+static cl::opt<bool>
+ AllowNonPow2("sbvec-allow-non-pow2", cl::init(false), cl::Hidden,
+ cl::desc("Allow non-power-of-2 vectorization."));
+
+namespace sandboxir {
BottomUpVec::BottomUpVec(StringRef Pipeline)
: FunctionPass("bottom-up-vec"),
RPM("rpm", Pipeline, SandboxVectorizerPassBuilder::createRegionPass) {}
-// TODO: This is a temporary function that returns some seeds.
-// Replace this with SeedCollector's function when it lands.
-static llvm::SmallVector<Value *, 4> collectSeeds(BasicBlock &BB) {
- llvm::SmallVector<Value *, 4> Seeds;
- for (auto &I : BB)
- if (auto *SI = llvm::dyn_cast<StoreInst>(&I))
- Seeds.push_back(SI);
- return Seeds;
-}
-
static SmallVector<Value *, 4> getOperand(ArrayRef<Value *> Bndl,
unsigned OpIdx) {
SmallVector<Value *, 4> Operands;
@@ -265,6 +267,7 @@ Value *BottomUpVec::vectorizeRec(ArrayRef<Value *> Bndl, unsigned Depth) {
bool BottomUpVec::tryVectorize(ArrayRef<Value *> Bndl) {
DeadInstrCandidates.clear();
+ Legality->clear();
vectorizeRec(Bndl, /*Depth=*/0);
tryEraseDeadInstrs();
return Change;
@@ -275,17 +278,67 @@ bool BottomUpVec::runOnFunction(Function &F, const Analyses &A) {
A.getAA(), A.getScalarEvolution(), F.getParent()->getDataLayout(),
F.getContext());
Change = false;
+ const auto &DL = F.getParent()->getDataLayout();
+ unsigned VecRegBits =
+ OverrideVecRegBits != 0
+ ? OverrideVecRegBits
+ : A.getTTI()
+ .getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)
+ .getFixedValue();
+
// TODO: Start from innermost BBs first
for (auto &BB : F) {
- // TODO: Replace with proper SeedCollector function.
- auto Seeds = collectSeeds(BB);
- // TODO: Slice Seeds into smaller chunks.
- // TODO: If vectorization succeeds, run the RegionPassManager on the
- // resulting region.
- if (Seeds.size() >= 2)
- Change |= tryVectorize(Seeds);
+ SeedCollector SC(&BB, A.getScalarEvolution());
+ for (SeedBundle &Seeds : SC.getStoreSeeds()) {
+ unsigned ElmBits =
+ Utils::getNumBits(VecUtils::getElementType(Utils::getExpectedType(
+ Seeds[Seeds.getFirstUnusedElementIdx()])),
+ DL);
+
+ auto DivideBy2 = [](unsigned Num) {
+ auto Floor = VecUtils::getFloorPowerOf2(Num);
+ if (Floor == Num)
+ return Floor / 2;
+ return Floor;
+ };
+ // Try to create the largest vector supported by the target. If it fails
+ // reduce the vector size by half.
+ for (unsigned SliceElms = std::min(VecRegBits / ElmBits,
+ Seeds.getNumUnusedBits() / ElmBits);
+ SliceElms >= 2u; SliceElms = DivideBy2(SliceElms)) {
+ if (Seeds.allUsed())
+ break;
+ // Keep trying offsets after FirstUnusedElementIdx, until we vectorize
+ // the slice. This could be quite expensive, so we enforce a limit.
+ for (unsigned Offset = Seeds.getFirstUnusedElementIdx(),
+ OE = Seeds.size();
+ Offset + 1 < OE; Offset += 1) {
+ // Seeds are getting used as we vectorize, so skip them.
+ if (Seeds.isUsed(Offset))
+ continue;
+ if (Seeds.allUsed())
+ break;
+
+ auto SeedSlice =
+ Seeds.getSlice(Offset, SliceElms * ElmBits, !AllowNonPow2);
+ if (SeedSlice.empty())
+ continue;
+
+ assert(SeedSlice.size() >= 2 && "Should have been rejected!");
+
+ // TODO: If vectorization succeeds, run the RegionPassManager on the
+ // resulting region.
+
+ // TODO: Refactor to remove the unnecessary copy to SeedSliceVals.
+ SmallVector<Value *> SeedSliceVals(SeedSlice.begin(),
+ SeedSlice.end());
+ Change |= tryVectorize(SeedSliceVals);
+ }
+ }
+ }
}
return Change;
}
-} // namespace llvm::sandboxir
+} // namespace sandboxir
+} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp
index c22eb01d74a1cb..a6e2b40000529a 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp
@@ -86,6 +86,6 @@ bool SandboxVectorizerPass::runImpl(Function &LLVMF) {
// Create SandboxIR for LLVMF and run BottomUpVec on it.
sandboxir::Function &F = *Ctx->createFunction(&LLVMF);
- sandboxir::Analyses A(*AA, *SE);
+ sandboxir::Analyses A(*AA, *SE, *TTI);
return FPM.runOnFunction(F, A);
}
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp
index 6ea34c5e0598df..a3ce663407c4a9 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp
@@ -31,9 +31,9 @@ cl::opt<unsigned> SeedGroupsLimit(
cl::desc("Limit the number of collected seeds groups in a BB to "
"cap compilation time."));
-MutableArrayRef<Instruction *> SeedBundle::getSlice(unsigned StartIdx,
- unsigned MaxVecRegBits,
- bool ForcePowerOf2) {
+ArrayRef<Instruction *> SeedBundle::getSlice(unsigned StartIdx,
+ unsigned MaxVecRegBits,
+ bool ForcePowerOf2) {
// Use uint32_t here for compatibility with IsPowerOf2_32
// BitCount tracks the size of the working slice. From that we can tell
@@ -47,10 +47,13 @@ MutableArrayRef<Instruction *> SeedBundle::getSlice(unsigned StartIdx,
// Can't start a slice with a used instruction.
assert(!isUsed(StartIdx) && "Expected unused at StartIdx");
for (auto S : make_range(Seeds.begin() + StartIdx, Seeds.end())) {
+ // Stop if this instruction is used. This needs to be done before
+ // getNumBits() because a "used" instruction may have been erased.
+ if (isUsed(StartIdx + NumElements))
+ break;
uint32_t InstBits = Utils::getNumBits(S);
- // Stop if this instruction is used, or if adding it puts the slice over
- // the limit.
- if (isUsed(StartIdx + NumElements) || BitCount + InstBits > MaxVecRegBits)
+ // Stop if adding it puts the slice over the limit.
+ if (BitCount + InstBits > MaxVecRegBits)
break;
NumElements++;
BitCount += InstBits;
@@ -68,7 +71,7 @@ MutableArrayRef<Instruction *> SeedBundle::getSlice(unsigned StartIdx,
"Must be a power of two");
// Return any non-empty slice
if (NumElements > 1)
- return MutableArrayRef<Instruction *>(&Seeds[StartIdx], NumElements);
+ return ArrayRef<Instruction *>(&Seeds[StartIdx], NumElements);
else
return {};
}
diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
index 7422d287ff3e2a..785d1f4ef666fc 100644
--- a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
+++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -passes=sandbox-vectorizer -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s
+; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s
define void @store_load(ptr %ptr) {
; CHECK-LABEL: define void @store_load(
diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice.ll
new file mode 100644
index 00000000000000..46cda3c80aaa35
--- /dev/null
+++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s
+
+
+declare void @foo()
+define void @slice_seeds(ptr %ptr, float %val) {
+; CHECK-LABEL: define void @slice_seeds(
+; CHECK-SAME: ptr [[PTR:%.*]], float [[VAL:%.*]]) {
+; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
+; CHECK-NEXT: [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
+; CHECK-NEXT: [[PTR2:%.*]] = getelementptr float, ptr [[PTR]], i32 2
+; CHECK-NEXT: [[LD2:%.*]] = load float, ptr [[PTR2]], align 4
+; CHECK-NEXT: store float [[LD2]], ptr [[PTR2]], align 4
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
+; CHECK-NEXT: store <2 x float> [[VECL]], ptr [[PTR0]], align 4
+; CHECK-NEXT: ret void
+;
+ %ptr0 = getelementptr float, ptr %ptr, i32 0
+ %ptr1 = getelementptr float, ptr %ptr, i32 1
+ %ptr2 = getelementptr float, ptr %ptr, i32 2
+
+ %ld2 = load float, ptr %ptr2
+ store float %ld2, ptr %ptr2
+ ; This call blocks scheduling of all 3 stores.
+ call void @foo()
+
+ %ld0 = load float, ptr %ptr0
+ %ld1 = load float, ptr %ptr1
+ store float %ld0, ptr %ptr0
+ store float %ld1, ptr %ptr1
+ ret void
+}
diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll
new file mode 100644
index 00000000000000..22119c4491b929
--- /dev/null
+++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2=false -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s --check-prefix=POW2
+; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2=true -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s --check-prefix=NON-POW2
+
+define void @pow2(ptr %ptr, float %val) {
+; POW2-LABEL: define void @pow2(
+; POW2-SAME: ptr [[PTR:%.*]], float [[VAL:%.*]]) {
+; POW2-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
+; POW2-NEXT: [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
+; POW2-NEXT: [[PTR2:%.*]] = getelementptr float, ptr [[PTR]], i32 2
+; POW2-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
+; POW2-NEXT: [[LD2:%.*]] = load float, ptr [[PTR2]], align 4
+; POW2-NEXT: store <2 x float> [[VECL]], ptr [[PTR0]], align 4
+; POW2-NEXT: store float [[LD2]], ptr [[PTR2]], align 4
+; POW2-NEXT: ret void
+;
+; NON-POW2-LABEL: define void @pow2(
+; NON-POW2-SAME: ptr [[PTR:%.*]], float [[VAL:%.*]]) {
+; NON-POW2-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
+; NON-POW2-NEXT: [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
+; NON-POW2-NEXT: [[PTR2:%.*]] = getelementptr float, ptr [[PTR]], i32 2
+; NON-POW2-NEXT: [[PACK2:%.*]] = load <3 x float>, ptr [[PTR0]], align 4
+; NON-POW2-NEXT: store <3 x float> [[PACK2]], ptr [[PTR0]], align 4
+; NON-POW2-NEXT: ret void
+;
+ %ptr0 = getelementptr float, ptr %ptr, i32 0
+ %ptr1 = getelementptr float, ptr %ptr, i32 1
+ %ptr2 = getelementptr float, ptr %ptr, i32 2
+
+ %ld0 = load float, ptr %ptr0
+ %ld1 = load float, ptr %ptr1
+ %ld2 = load float, ptr %ptr2
+ store float %ld0, ptr %ptr0
+ store float %ld1, ptr %ptr1
+ store float %ld2, ptr %ptr2
+ ret void
+}
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp
index cf7b6cbc7e55cb..8661dcd5067c0a 100644
--- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp
@@ -472,3 +472,14 @@ define void @foo(i8 %v, ptr %ptr) {
#endif // NDEBUG
}
}
+
+TEST_F(VecUtilsTest, FloorPowerOf2) {
+ EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(0), 0u);
+ EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(1 << 0), 1u << 0);
+ EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(3), 2u);
+ EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(4), 4u);
+ EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(5), 4u);
+ EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(7), 4u);
+ EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(8), 8u);
+ EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(9), 8u);
+}
|
@llvm/pr-subscribers-llvm-transforms Author: vporpo (vporpo) ChangesWith this patch we switch from the temporary dummy seeds to actual seeds provided by the seed collector. Full diff: https://github.com/llvm/llvm-project/pull/120826.diff 13 Files Affected:
diff --git a/llvm/include/llvm/SandboxIR/Pass.h b/llvm/include/llvm/SandboxIR/Pass.h
index 4f4eae87cd3ff7..267389a8a87a2e 100644
--- a/llvm/include/llvm/SandboxIR/Pass.h
+++ b/llvm/include/llvm/SandboxIR/Pass.h
@@ -16,6 +16,7 @@ namespace llvm {
class AAResults;
class ScalarEvolution;
+class TargetTransformInfo;
namespace sandboxir {
@@ -25,15 +26,18 @@ class Region;
class Analyses {
AAResults *AA = nullptr;
ScalarEvolution *SE = nullptr;
+ TargetTransformInfo *TTI = nullptr;
Analyses() = default;
public:
- Analyses(AAResults &AA, ScalarEvolution &SE) : AA(&AA), SE(&SE) {}
+ Analyses(AAResults &AA, ScalarEvolution &SE, TargetTransformInfo &TTI)
+ : AA(&AA), SE(&SE), TTI(&TTI) {}
public:
AAResults &getAA() const { return *AA; }
ScalarEvolution &getScalarEvolution() const { return *SE; }
+ TargetTransformInfo &getTTI() const { return *TTI; }
/// For use by unit tests.
static Analyses emptyForTesting() { return Analyses(); }
};
diff --git a/llvm/include/llvm/SandboxIR/Utils.h b/llvm/include/llvm/SandboxIR/Utils.h
index a73498adea1d59..d58fe522143953 100644
--- a/llvm/include/llvm/SandboxIR/Utils.h
+++ b/llvm/include/llvm/SandboxIR/Utils.h
@@ -60,11 +60,16 @@ class Utils {
getUnderlyingObject(LSI->getPointerOperand()->Val));
}
+ /// \Returns the number of bits of \p Ty.
+ static unsigned getNumBits(Type *Ty, const DataLayout &DL) {
+ return DL.getTypeSizeInBits(Ty->LLVMTy);
+ }
+
/// \Returns the number of bits required to represent the operands or return
/// value of \p V in \p DL.
static unsigned getNumBits(Value *V, const DataLayout &DL) {
Type *Ty = getExpectedType(V);
- return DL.getTypeSizeInBits(Ty->LLVMTy);
+ return getNumBits(Ty, DL);
}
/// \Returns the number of bits required to represent the operands or
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
index 63d6ef31c86453..233cf82a1b3dfb 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
@@ -177,6 +177,7 @@ class LegalityAnalysis {
// TODO: Try to remove the SkipScheduling argument by refactoring the tests.
const LegalityResult &canVectorize(ArrayRef<Value *> Bndl,
bool SkipScheduling = false);
+ void clear() { Sched.clear(); }
};
} // namespace llvm::sandboxir
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h
index 3959f84c601e04..1e8c0101cf77cd 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h
@@ -143,6 +143,13 @@ class Scheduler {
~Scheduler() {}
bool trySchedule(ArrayRef<Instruction *> Instrs);
+ /// Clear the scheduler's state, including the DAG.
+ void clear() {
+ Bndls.clear();
+ // TODO: clear view once it lands.
+ DAG.clear();
+ ScheduleTopItOpt = std::nullopt;
+ }
#ifndef NDEBUG
void dump(raw_ostream &OS) const;
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
index 6e16a84d832e5e..73b2bdf8f181f6 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h
@@ -95,8 +95,8 @@ class SeedBundle {
/// with a total size <= \p MaxVecRegBits, or an empty slice if the
/// requirements cannot be met . If \p ForcePowOf2 is true, then the returned
/// slice will have a total number of bits that is a power of 2.
- MutableArrayRef<Instruction *>
- getSlice(unsigned StartIdx, unsigned MaxVecRegBits, bool ForcePowOf2);
+ ArrayRef<Instruction *> getSlice(unsigned StartIdx, unsigned MaxVecRegBits,
+ bool ForcePowOf2);
/// \Returns the number of seed elements in the bundle.
std::size_t size() const { return Seeds.size(); }
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h
index fc9d67fcfcdec4..519777cf87d614 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h
@@ -133,6 +133,16 @@ class VecUtils {
assert(tryGetCommonScalarType(Bndl) && "Expected common scalar type!");
return ScalarTy;
}
+ /// \Returns the first integer power of 2 that is <= Num.
+ static unsigned getFloorPowerOf2(unsigned Num) {
+ if (Num == 0)
+ return Num;
+ unsigned Mask = Num;
+ Mask >>= 1;
+ for (int ShiftBy = 1; ShiftBy < 32; ShiftBy <<= 1)
+ Mask |= Mask >> ShiftBy;
+ return Num & ~Mask;
+ }
};
} // namespace llvm::sandboxir
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
index a2ea11be59b8ed..18e072c17d202b 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
@@ -8,29 +8,31 @@
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/SandboxIR/Function.h"
#include "llvm/SandboxIR/Instruction.h"
#include "llvm/SandboxIR/Module.h"
#include "llvm/SandboxIR/Utils.h"
#include "llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.h"
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h"
#include "llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h"
-namespace llvm::sandboxir {
+namespace llvm {
+
+static cl::opt<unsigned>
+ OverrideVecRegBits("sbvec-vec-reg-bits", cl::init(0), cl::Hidden,
+ cl::desc("Override the vector register size in bits, "
+ "which is otherwise found by querying TTI."));
+static cl::opt<bool>
+ AllowNonPow2("sbvec-allow-non-pow2", cl::init(false), cl::Hidden,
+ cl::desc("Allow non-power-of-2 vectorization."));
+
+namespace sandboxir {
BottomUpVec::BottomUpVec(StringRef Pipeline)
: FunctionPass("bottom-up-vec"),
RPM("rpm", Pipeline, SandboxVectorizerPassBuilder::createRegionPass) {}
-// TODO: This is a temporary function that returns some seeds.
-// Replace this with SeedCollector's function when it lands.
-static llvm::SmallVector<Value *, 4> collectSeeds(BasicBlock &BB) {
- llvm::SmallVector<Value *, 4> Seeds;
- for (auto &I : BB)
- if (auto *SI = llvm::dyn_cast<StoreInst>(&I))
- Seeds.push_back(SI);
- return Seeds;
-}
-
static SmallVector<Value *, 4> getOperand(ArrayRef<Value *> Bndl,
unsigned OpIdx) {
SmallVector<Value *, 4> Operands;
@@ -265,6 +267,7 @@ Value *BottomUpVec::vectorizeRec(ArrayRef<Value *> Bndl, unsigned Depth) {
bool BottomUpVec::tryVectorize(ArrayRef<Value *> Bndl) {
DeadInstrCandidates.clear();
+ Legality->clear();
vectorizeRec(Bndl, /*Depth=*/0);
tryEraseDeadInstrs();
return Change;
@@ -275,17 +278,67 @@ bool BottomUpVec::runOnFunction(Function &F, const Analyses &A) {
A.getAA(), A.getScalarEvolution(), F.getParent()->getDataLayout(),
F.getContext());
Change = false;
+ const auto &DL = F.getParent()->getDataLayout();
+ unsigned VecRegBits =
+ OverrideVecRegBits != 0
+ ? OverrideVecRegBits
+ : A.getTTI()
+ .getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)
+ .getFixedValue();
+
// TODO: Start from innermost BBs first
for (auto &BB : F) {
- // TODO: Replace with proper SeedCollector function.
- auto Seeds = collectSeeds(BB);
- // TODO: Slice Seeds into smaller chunks.
- // TODO: If vectorization succeeds, run the RegionPassManager on the
- // resulting region.
- if (Seeds.size() >= 2)
- Change |= tryVectorize(Seeds);
+ SeedCollector SC(&BB, A.getScalarEvolution());
+ for (SeedBundle &Seeds : SC.getStoreSeeds()) {
+ unsigned ElmBits =
+ Utils::getNumBits(VecUtils::getElementType(Utils::getExpectedType(
+ Seeds[Seeds.getFirstUnusedElementIdx()])),
+ DL);
+
+ auto DivideBy2 = [](unsigned Num) {
+ auto Floor = VecUtils::getFloorPowerOf2(Num);
+ if (Floor == Num)
+ return Floor / 2;
+ return Floor;
+ };
+ // Try to create the largest vector supported by the target. If it fails
+ // reduce the vector size by half.
+ for (unsigned SliceElms = std::min(VecRegBits / ElmBits,
+ Seeds.getNumUnusedBits() / ElmBits);
+ SliceElms >= 2u; SliceElms = DivideBy2(SliceElms)) {
+ if (Seeds.allUsed())
+ break;
+ // Keep trying offsets after FirstUnusedElementIdx, until we vectorize
+ // the slice. This could be quite expensive, so we enforce a limit.
+ for (unsigned Offset = Seeds.getFirstUnusedElementIdx(),
+ OE = Seeds.size();
+ Offset + 1 < OE; Offset += 1) {
+ // Seeds are getting used as we vectorize, so skip them.
+ if (Seeds.isUsed(Offset))
+ continue;
+ if (Seeds.allUsed())
+ break;
+
+ auto SeedSlice =
+ Seeds.getSlice(Offset, SliceElms * ElmBits, !AllowNonPow2);
+ if (SeedSlice.empty())
+ continue;
+
+ assert(SeedSlice.size() >= 2 && "Should have been rejected!");
+
+ // TODO: If vectorization succeeds, run the RegionPassManager on the
+ // resulting region.
+
+ // TODO: Refactor to remove the unnecessary copy to SeedSliceVals.
+ SmallVector<Value *> SeedSliceVals(SeedSlice.begin(),
+ SeedSlice.end());
+ Change |= tryVectorize(SeedSliceVals);
+ }
+ }
+ }
}
return Change;
}
-} // namespace llvm::sandboxir
+} // namespace sandboxir
+} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp
index c22eb01d74a1cb..a6e2b40000529a 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp
@@ -86,6 +86,6 @@ bool SandboxVectorizerPass::runImpl(Function &LLVMF) {
// Create SandboxIR for LLVMF and run BottomUpVec on it.
sandboxir::Function &F = *Ctx->createFunction(&LLVMF);
- sandboxir::Analyses A(*AA, *SE);
+ sandboxir::Analyses A(*AA, *SE, *TTI);
return FPM.runOnFunction(F, A);
}
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp
index 6ea34c5e0598df..a3ce663407c4a9 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SeedCollector.cpp
@@ -31,9 +31,9 @@ cl::opt<unsigned> SeedGroupsLimit(
cl::desc("Limit the number of collected seeds groups in a BB to "
"cap compilation time."));
-MutableArrayRef<Instruction *> SeedBundle::getSlice(unsigned StartIdx,
- unsigned MaxVecRegBits,
- bool ForcePowerOf2) {
+ArrayRef<Instruction *> SeedBundle::getSlice(unsigned StartIdx,
+ unsigned MaxVecRegBits,
+ bool ForcePowerOf2) {
// Use uint32_t here for compatibility with IsPowerOf2_32
// BitCount tracks the size of the working slice. From that we can tell
@@ -47,10 +47,13 @@ MutableArrayRef<Instruction *> SeedBundle::getSlice(unsigned StartIdx,
// Can't start a slice with a used instruction.
assert(!isUsed(StartIdx) && "Expected unused at StartIdx");
for (auto S : make_range(Seeds.begin() + StartIdx, Seeds.end())) {
+ // Stop if this instruction is used. This needs to be done before
+ // getNumBits() because a "used" instruction may have been erased.
+ if (isUsed(StartIdx + NumElements))
+ break;
uint32_t InstBits = Utils::getNumBits(S);
- // Stop if this instruction is used, or if adding it puts the slice over
- // the limit.
- if (isUsed(StartIdx + NumElements) || BitCount + InstBits > MaxVecRegBits)
+ // Stop if adding it puts the slice over the limit.
+ if (BitCount + InstBits > MaxVecRegBits)
break;
NumElements++;
BitCount += InstBits;
@@ -68,7 +71,7 @@ MutableArrayRef<Instruction *> SeedBundle::getSlice(unsigned StartIdx,
"Must be a power of two");
// Return any non-empty slice
if (NumElements > 1)
- return MutableArrayRef<Instruction *>(&Seeds[StartIdx], NumElements);
+ return ArrayRef<Instruction *>(&Seeds[StartIdx], NumElements);
else
return {};
}
diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
index 7422d287ff3e2a..785d1f4ef666fc 100644
--- a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
+++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -passes=sandbox-vectorizer -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s
+; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s
define void @store_load(ptr %ptr) {
; CHECK-LABEL: define void @store_load(
diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice.ll
new file mode 100644
index 00000000000000..46cda3c80aaa35
--- /dev/null
+++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s
+
+
+declare void @foo()
+define void @slice_seeds(ptr %ptr, float %val) {
+; CHECK-LABEL: define void @slice_seeds(
+; CHECK-SAME: ptr [[PTR:%.*]], float [[VAL:%.*]]) {
+; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
+; CHECK-NEXT: [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
+; CHECK-NEXT: [[PTR2:%.*]] = getelementptr float, ptr [[PTR]], i32 2
+; CHECK-NEXT: [[LD2:%.*]] = load float, ptr [[PTR2]], align 4
+; CHECK-NEXT: store float [[LD2]], ptr [[PTR2]], align 4
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
+; CHECK-NEXT: store <2 x float> [[VECL]], ptr [[PTR0]], align 4
+; CHECK-NEXT: ret void
+;
+ %ptr0 = getelementptr float, ptr %ptr, i32 0
+ %ptr1 = getelementptr float, ptr %ptr, i32 1
+ %ptr2 = getelementptr float, ptr %ptr, i32 2
+
+ %ld2 = load float, ptr %ptr2
+ store float %ld2, ptr %ptr2
+ ; This call blocks scheduling of all 3 stores.
+ call void @foo()
+
+ %ld0 = load float, ptr %ptr0
+ %ld1 = load float, ptr %ptr1
+ store float %ld0, ptr %ptr0
+ store float %ld1, ptr %ptr1
+ ret void
+}
diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll
new file mode 100644
index 00000000000000..22119c4491b929
--- /dev/null
+++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2=false -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s --check-prefix=POW2
+; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2=true -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s --check-prefix=NON-POW2
+
+define void @pow2(ptr %ptr, float %val) {
+; POW2-LABEL: define void @pow2(
+; POW2-SAME: ptr [[PTR:%.*]], float [[VAL:%.*]]) {
+; POW2-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
+; POW2-NEXT: [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
+; POW2-NEXT: [[PTR2:%.*]] = getelementptr float, ptr [[PTR]], i32 2
+; POW2-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
+; POW2-NEXT: [[LD2:%.*]] = load float, ptr [[PTR2]], align 4
+; POW2-NEXT: store <2 x float> [[VECL]], ptr [[PTR0]], align 4
+; POW2-NEXT: store float [[LD2]], ptr [[PTR2]], align 4
+; POW2-NEXT: ret void
+;
+; NON-POW2-LABEL: define void @pow2(
+; NON-POW2-SAME: ptr [[PTR:%.*]], float [[VAL:%.*]]) {
+; NON-POW2-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
+; NON-POW2-NEXT: [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
+; NON-POW2-NEXT: [[PTR2:%.*]] = getelementptr float, ptr [[PTR]], i32 2
+; NON-POW2-NEXT: [[PACK2:%.*]] = load <3 x float>, ptr [[PTR0]], align 4
+; NON-POW2-NEXT: store <3 x float> [[PACK2]], ptr [[PTR0]], align 4
+; NON-POW2-NEXT: ret void
+;
+ %ptr0 = getelementptr float, ptr %ptr, i32 0
+ %ptr1 = getelementptr float, ptr %ptr, i32 1
+ %ptr2 = getelementptr float, ptr %ptr, i32 2
+
+ %ld0 = load float, ptr %ptr0
+ %ld1 = load float, ptr %ptr1
+ %ld2 = load float, ptr %ptr2
+ store float %ld0, ptr %ptr0
+ store float %ld1, ptr %ptr1
+ store float %ld2, ptr %ptr2
+ ret void
+}
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp
index cf7b6cbc7e55cb..8661dcd5067c0a 100644
--- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp
@@ -472,3 +472,14 @@ define void @foo(i8 %v, ptr %ptr) {
#endif // NDEBUG
}
}
+
+TEST_F(VecUtilsTest, FloorPowerOf2) {
+ EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(0), 0u);
+ EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(1 << 0), 1u << 0);
+ EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(3), 2u);
+ EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(4), 4u);
+ EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(5), 4u);
+ EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(7), 4u);
+ EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(8), 8u);
+ EXPECT_EQ(sandboxir::VecUtils::getFloorPowerOf2(9), 8u);
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good to me.
return Num; | ||
unsigned Mask = Num; | ||
Mask >>= 1; | ||
for (int ShiftBy = 1; ShiftBy < 32; ShiftBy <<= 1) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: Should 32 here depend on sizeof(unsigned)
instead?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
With this patch we switch from the temporary dummy seeds to actual seeds provided by the seed collector.
The seeds get sliced and each slice is used as the starting point for vectorization.