Skip to content

Commit

Permalink
[Clang] Use poison as base for vector literals
Browse files Browse the repository at this point in the history
When constructing vectors from elements, use poison instead of
undef as the base value. These literals always initialize all
elements (padding the remainder with zero), so that the choice
of base value does not affect semantics.
  • Loading branch information
nikic committed Dec 19, 2023
1 parent 18e1179 commit a3d2d34
Show file tree
Hide file tree
Showing 35 changed files with 508 additions and 508 deletions.
26 changes: 13 additions & 13 deletions clang/lib/CodeGen/CGExprScalar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1894,8 +1894,8 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) {
// initializer, since LLVM optimizers generally do not want to touch
// shuffles.
unsigned CurIdx = 0;
bool VIsUndefShuffle = false;
llvm::Value *V = llvm::UndefValue::get(VType);
bool VIsPoisonShuffle = false;
llvm::Value *V = llvm::PoisonValue::get(VType);
for (unsigned i = 0; i != NumInitElements; ++i) {
Expr *IE = E->getInit(i);
Value *Init = Visit(IE);
Expand All @@ -1915,16 +1915,16 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) {
llvm::ConstantInt *C = cast<llvm::ConstantInt>(EI->getIndexOperand());
Value *LHS = nullptr, *RHS = nullptr;
if (CurIdx == 0) {
// insert into undef -> shuffle (src, undef)
// insert into poison -> shuffle (src, poison)
// shufflemask must use an i32
Args.push_back(getAsInt32(C, CGF.Int32Ty));
Args.resize(ResElts, -1);

LHS = EI->getVectorOperand();
RHS = V;
VIsUndefShuffle = true;
} else if (VIsUndefShuffle) {
// insert into undefshuffle && size match -> shuffle (v, src)
VIsPoisonShuffle = true;
} else if (VIsPoisonShuffle) {
// insert into poison shuffle && size match -> shuffle (v, src)
llvm::ShuffleVectorInst *SVV = cast<llvm::ShuffleVectorInst>(V);
for (unsigned j = 0; j != CurIdx; ++j)
Args.push_back(getMaskElt(SVV, j, 0));
Expand All @@ -1933,7 +1933,7 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) {

LHS = cast<llvm::ShuffleVectorInst>(V)->getOperand(0);
RHS = EI->getVectorOperand();
VIsUndefShuffle = false;
VIsPoisonShuffle = false;
}
if (!Args.empty()) {
V = Builder.CreateShuffleVector(LHS, RHS, Args);
Expand All @@ -1944,7 +1944,7 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) {
}
V = Builder.CreateInsertElement(V, Init, Builder.getInt32(CurIdx),
"vecinit");
VIsUndefShuffle = false;
VIsPoisonShuffle = false;
++CurIdx;
continue;
}
Expand All @@ -1962,9 +1962,9 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) {

if (OpTy->getNumElements() == ResElts) {
for (unsigned j = 0; j != CurIdx; ++j) {
// If the current vector initializer is a shuffle with undef, merge
// If the current vector initializer is a shuffle with poison, merge
// this shuffle directly into it.
if (VIsUndefShuffle) {
if (VIsPoisonShuffle) {
Args.push_back(getMaskElt(cast<llvm::ShuffleVectorInst>(V), j, 0));
} else {
Args.push_back(j);
Expand All @@ -1974,7 +1974,7 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) {
Args.push_back(getMaskElt(SVI, j, Offset));
Args.resize(ResElts, -1);

if (VIsUndefShuffle)
if (VIsPoisonShuffle)
V = cast<llvm::ShuffleVectorInst>(V)->getOperand(0);

Init = SVOp;
Expand All @@ -1997,12 +1997,12 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) {
Args.resize(ResElts, -1);
}

// If V is undef, make sure it ends up on the RHS of the shuffle to aid
// If V is poison, make sure it ends up on the RHS of the shuffle to aid
// merging subsequent shuffles into this one.
if (CurIdx == 0)
std::swap(V, Init);
V = Builder.CreateShuffleVector(V, Init, Args, "vecinit");
VIsUndefShuffle = isa<llvm::UndefValue>(Init);
VIsPoisonShuffle = isa<llvm::PoisonValue>(Init);
CurIdx += InitElts;
}

Expand Down
20 changes: 10 additions & 10 deletions clang/test/CodeGen/PowerPC/ppc-emmintrin.c
Original file line number Diff line number Diff line change
Expand Up @@ -521,14 +521,14 @@ test_converts() {
// CHECK: sitofp i64 %{{[0-9a-zA-Z_.]+}} to double

// CHECK-LABEL: define available_externally <2 x i64> @_mm_cvtsi64_si128
// CHECK: %[[INS:[0-9a-zA-Z_.]+]] = insertelement <2 x i64> undef, i64 %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[INS:[0-9a-zA-Z_.]+]] = insertelement <2 x i64> poison, i64 %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: insertelement <2 x i64> %[[INS]], i64 0, i32 1

// CHECK-LABEL: define available_externally <2 x double> @_mm_cvtsi64x_sd
// CHECK: call <2 x double> @_mm_cvtsi64_sd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef %{{[0-9a-zA-Z_.]+}})

// CHECK-LABEL: define available_externally <2 x i64> @_mm_cvtsi64x_si128
// CHECK: %[[INS:[0-9a-zA-Z_.]+]] = insertelement <2 x i64> undef, i64 %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[INS:[0-9a-zA-Z_.]+]] = insertelement <2 x i64> poison, i64 %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: insertelement <2 x i64> %[[INS]], i64 0, i32 1

// CHECK-LABEL: define available_externally <2 x double> @_mm_cvtss_sd
Expand Down Expand Up @@ -906,35 +906,35 @@ test_set() {

// CHECK-LABEL: define available_externally <2 x i64> @_mm_set_epi16
// CHECK-COUNT-8: store i16 {{[0-9a-zA-Z_%.]+}}, ptr {{[0-9a-zA-Z_%.]+}}, align 2
// CHECK: insertelement <8 x i16> undef, i16 {{[0-9a-zA-Z_%.]+}}, i32 0
// CHECK: insertelement <8 x i16> poison, i16 {{[0-9a-zA-Z_%.]+}}, i32 0
// CHECK-COUNT-7: insertelement <8 x i16> {{[0-9a-zA-Z_%.]+}}, i16 {{[0-9a-zA-Z_%.]+}}, i32 {{[1-7]}}

// CHECK-LABEL: define available_externally <2 x i64> @_mm_set_epi32
// CHECK-COUNT-4: store i32 {{[0-9a-zA-Z_%.]+}}, ptr {{[0-9a-zA-Z_%.]+}}, align 4
// CHECK: insertelement <4 x i32> undef, i32 {{[0-9a-zA-Z_%.]+}}, i32 0
// CHECK: insertelement <4 x i32> poison, i32 {{[0-9a-zA-Z_%.]+}}, i32 0
// CHECK-COUNT-3: insertelement <4 x i32> {{[0-9a-zA-Z_%.]+}}, i32 {{[0-9a-zA-Z_%.]+}}, i32 {{[1-3]}}

// CHECK-LABEL: define available_externally <2 x i64> @_mm_set_epi64
// CHECK: call <2 x i64> @_mm_set_epi64x(i64 noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef %{{[0-9a-zA-Z_.]+}})

// CHECK-LABEL: define available_externally <2 x i64> @_mm_set_epi64x
// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x i64> undef, i64 %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x i64> poison, i64 %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: insertelement <2 x i64> %[[VEC]], i64 %{{[0-9a-zA-Z_.]+}}, i32 1

// CHECK-LABEL: define available_externally <2 x i64> @_mm_set_epi8
// CHECK-COUNT-16: store i8 {{[0-9a-zA-Z_%.]+}}, ptr {{[0-9a-zA-Z_%.]+}}, align 1
// CHECK: insertelement <16 x i8> undef, i8 {{[0-9a-zA-Z_%.]+}}, i32 {{[0-9]+}}
// CHECK: insertelement <16 x i8> poison, i8 {{[0-9a-zA-Z_%.]+}}, i32 {{[0-9]+}}
// CHECK-COUNT-15: {{[0-9a-zA-Z_%.]+}} = insertelement <16 x i8> {{[0-9a-zA-Z_%.]+}}, i8 {{[0-9a-zA-Z_%.]+}}, i32 {{[0-9]+}}

// CHECK-LABEL: define available_externally <2 x double> @_mm_set_pd
// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x double> undef, double %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x double> poison, double %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: insertelement <2 x double> %[[VEC]], double %{{[0-9a-zA-Z_.]+}}, i32 1

// CHECK-LABEL: define available_externally <2 x double> @_mm_set_pd1
// CHECK: call <2 x double> @_mm_set1_pd(double noundef %{{[0-9a-zA-Z_.]+}})

// CHECK-LABEL: define available_externally <2 x double> @_mm_set_sd
// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x double> undef, double %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x double> poison, double %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: insertelement <2 x double> %[[VEC]], double 0.000000e+00, i32 1

// CHECK-LABEL: define available_externally <2 x i64> @_mm_set1_epi16
Expand All @@ -960,7 +960,7 @@ test_set() {
// CHECK: call <2 x i64> @_mm_set_epi8

// CHECK-LABEL: define available_externally <2 x double> @_mm_set1_pd
// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x double> undef, double %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x double> poison, double %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: insertelement <2 x double> %[[VEC]], double %{{[0-9a-zA-Z_.]+}}, i32 1

// CHECK-LABEL: define available_externally <2 x i64> @_mm_setr_epi16
Expand All @@ -981,7 +981,7 @@ test_set() {
// CHECK: call <2 x i64> @_mm_set_epi8

// CHECK-LABEL: define available_externally <2 x double> @_mm_setr_pd
// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x double> undef, double %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x double> poison, double %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: insertelement <2 x double> %[[VEC]], double %{{[0-9a-zA-Z_.]+}}, i32 1

// CHECK-LABEL: define available_externally <2 x double> @_mm_setzero_pd()
Expand Down
12 changes: 6 additions & 6 deletions clang/test/CodeGen/PowerPC/ppc-xmmintrin.c
Original file line number Diff line number Diff line change
Expand Up @@ -796,8 +796,8 @@ test_sad() {

// CHECK-LABEL: define available_externally i64 @_mm_sad_pu8
// CHECK: call void @llvm.memset.p0.i64(ptr align 8 %{{[0-9a-zA-Z_.]+}}, i8 0, i64 8, i1 false)
// CHECK: insertelement <2 x i64> <i64 0, i64 undef>, i64 %{{[0-9a-zA-Z_.]+}}, i32 1
// CHECK: insertelement <2 x i64> <i64 0, i64 undef>, i64 %{{[0-9a-zA-Z_.]+}}, i32 1
// CHECK: insertelement <2 x i64> <i64 0, i64 poison>, i64 %{{[0-9a-zA-Z_.]+}}, i32 1
// CHECK: insertelement <2 x i64> <i64 0, i64 poison>, i64 %{{[0-9a-zA-Z_.]+}}, i32 1
// CHECK: call <16 x i8> @vec_min(unsigned char vector[16], unsigned char vector[16])
// CHECK: call <16 x i8> @vec_max(unsigned char vector[16], unsigned char vector[16])
// CHECK: call <16 x i8> @vec_sub(unsigned char vector[16], unsigned char vector[16])
Expand All @@ -823,7 +823,7 @@ test_set() {
// CHECK-LABEL: @test_set

// CHECK-LABEL: define available_externally <4 x float> @_mm_set_ps
// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <4 x float> undef, float %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <4 x float> poison, float %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VEC2:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC]], float %{{[0-9a-zA-Z_.]+}}, i32 1
// CHECK: %[[VEC3:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC2]], float %{{[0-9a-zA-Z_.]+}}, i32 2
// CHECK: %[[VEC4:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC3]], float %{{[0-9a-zA-Z_.]+}}, i32 3
Expand All @@ -833,21 +833,21 @@ test_set() {
// CHECK: call <4 x float> @_mm_set1_ps

// CHECK-LABEL: define available_externally <4 x float> @_mm_set_ss
// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <4 x float> undef, float %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <4 x float> poison, float %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VEC2:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC]], float 0.000000e+00, i32 1
// CHECK: %[[VEC3:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC2]], float 0.000000e+00, i32 2
// CHECK: %[[VEC4:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC3]], float 0.000000e+00, i32 3
// CHECK: store <4 x float> %[[VEC4]], ptr %{{[0-9a-zA-Z_.]+}}, align 16

// CHECK-LABEL: define available_externally <4 x float> @_mm_set1_ps
// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <4 x float> undef, float %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <4 x float> poison, float %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VEC2:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC]], float %{{[0-9a-zA-Z_.]+}}, i32 1
// CHECK: %[[VEC3:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC2]], float %{{[0-9a-zA-Z_.]+}}, i32 2
// CHECK: %[[VEC4:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC3]], float %{{[0-9a-zA-Z_.]+}}, i32 3
// CHECK: store <4 x float> %[[VEC4]], ptr %{{[0-9a-zA-Z_.]+}}, align 16

// CHECK-LABEL: define available_externally <4 x float> @_mm_setr_ps
// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <4 x float> undef, float %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <4 x float> poison, float %{{[0-9a-zA-Z_.]+}}, i32 0
// CHECK: %[[VEC2:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC]], float %{{[0-9a-zA-Z_.]+}}, i32 1
// CHECK: %[[VEC3:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC2]], float %{{[0-9a-zA-Z_.]+}}, i32 2
// CHECK: %[[VEC4:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC3]], float %{{[0-9a-zA-Z_.]+}}, i32 3
Expand Down
Loading

0 comments on commit a3d2d34

Please sign in to comment.