Skip to content

Commit

Permalink
[X86] When using AND+PACKUS in lowerV16I8Shuffle, generate the build …
Browse files Browse the repository at this point in the history
…vector directly in v16i8 with the correct 0x00 or 0xFF elements rather than using another VT and bitcasting it.

The build_vector will become a constant pool load. By using the
desired type initially, it ensures we don't generate a bitcast
of the constant pool load which will need to be folded with
the load.

While experimenting with another patch, I noticed that when the
load type and the constant pool type don't match, then
SimplifyDemandedBits can't handle it. While we should probably
fix that, this was a simple way to fix the issue I saw.

llvm-svn: 366732
  • Loading branch information
topperc committed Jul 22, 2019
1 parent 8dd563e commit 510e6fa
Show file tree
Hide file tree
Showing 16 changed files with 51 additions and 52 deletions.
9 changes: 4 additions & 5 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14157,11 +14157,10 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// First we need to zero all the dropped bytes.
assert(NumEvenDrops <= 3 &&
"No support for dropping even elements more than 3 times.");
// We use the mask type to pick which bytes are preserved based on how many
// elements are dropped.
MVT MaskVTs[] = { MVT::v8i16, MVT::v4i32, MVT::v2i64 };
SDValue ByteClearMask = DAG.getBitcast(
MVT::v16i8, DAG.getConstant(0xFF, DL, MaskVTs[NumEvenDrops - 1]));
SmallVector<SDValue, 16> ByteClearOps(16, DAG.getConstant(0, DL, MVT::i8));
for (unsigned i = 0; i != 16; i += 1 << NumEvenDrops)
ByteClearOps[i] = DAG.getConstant(0xFF, DL, MVT::i8);
SDValue ByteClearMask = DAG.getBuildVector(MVT::v16i8, DL, ByteClearOps);
V1 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V1, ByteClearMask);
if (!IsSingleInput)
V2 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V2, ByteClearMask);
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/avg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
; SSE2-NEXT: psubd %xmm5, %xmm0
; SSE2-NEXT: psrld $1, %xmm3
; SSE2-NEXT: psrld $1, %xmm8
; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [255,255,255,255]
; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; SSE2-NEXT: pand %xmm7, %xmm8
; SSE2-NEXT: pand %xmm7, %xmm3
; SSE2-NEXT: packuswb %xmm8, %xmm3
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/masked_store_trunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4673,7 +4673,7 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
; SSE2: # %bb.0:
; SSE2-NEXT: pxor %xmm7, %xmm7
; SSE2-NEXT: pcmpeqb %xmm4, %xmm7
; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [255,255,255,255,255,255,255,255]
; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE2-NEXT: pand %xmm6, %xmm1
; SSE2-NEXT: pand %xmm6, %xmm0
; SSE2-NEXT: packuswb %xmm1, %xmm0
Expand Down Expand Up @@ -6209,7 +6209,7 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
; SSE2: # %bb.0:
; SSE2-NEXT: pxor %xmm3, %xmm3
; SSE2-NEXT: pcmpeqb %xmm2, %xmm3
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE2-NEXT: pand %xmm4, %xmm1
; SSE2-NEXT: pand %xmm4, %xmm0
; SSE2-NEXT: packuswb %xmm1, %xmm0
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/mmx-arith.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ define void @test0(x86_mmx* %A, x86_mmx* %B) {
; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; X32-NEXT: pmullw %xmm0, %xmm1
; X32-NEXT: movdqa {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255]
; X32-NEXT: movdqa {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; X32-NEXT: movdqa %xmm1, %xmm2
; X32-NEXT: pand %xmm0, %xmm2
; X32-NEXT: packuswb %xmm2, %xmm2
Expand Down Expand Up @@ -100,7 +100,7 @@ define void @test0(x86_mmx* %A, x86_mmx* %B) {
; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; X64-NEXT: pmullw %xmm0, %xmm1
; X64-NEXT: movdqa {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255]
; X64-NEXT: movdqa {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; X64-NEXT: movdqa %xmm1, %xmm2
; X64-NEXT: pand %xmm0, %xmm2
; X64-NEXT: packuswb %xmm2, %xmm2
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/oddshuffles.ll
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ define void @v7i8(<4 x i8> %a, <4 x i8> %b, <7 x i8>* %p) nounwind {
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: pandn %xmm0, %xmm2
; SSE2-NEXT: por %xmm1, %xmm2
; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255]
; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: packuswb %xmm0, %xmm0
; SSE2-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp)
Expand Down Expand Up @@ -659,7 +659,7 @@ define void @v12i32(<8 x i32> %a, <8 x i32> %b, <12 x i32>* %p) nounwind {
define void @pr29025(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <12 x i8> *%p) nounwind {
; SSE2-LABEL: pr29025:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255]
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; SSE2-NEXT: pand %xmm3, %xmm1
; SSE2-NEXT: pand %xmm3, %xmm0
; SSE2-NEXT: packuswb %xmm1, %xmm0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/oddsubvector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ define void @insert_v7i8_v2i16_2(<7 x i8> *%a0, <2 x i16> *%a1) nounwind {
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,3]
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0,1,3]
; SSE2-NEXT: movaps {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
; SSE2-NEXT: movaps {{.*#+}} xmm1 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE2-NEXT: andps %xmm0, %xmm1
; SSE2-NEXT: packuswb %xmm1, %xmm1
; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/psubus.ll
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,7 @@ define <16 x i8> @test14(<16 x i8> %x, <16 x i32> %y) nounwind {
; SSE2-NEXT: psubd %xmm5, %xmm4
; SSE2-NEXT: por %xmm0, %xmm5
; SSE2-NEXT: pcmpgtd %xmm9, %xmm5
; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [255,255,255,255]
; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; SSE2-NEXT: pand %xmm9, %xmm5
; SSE2-NEXT: movdqa %xmm3, %xmm7
; SSE2-NEXT: pxor %xmm0, %xmm7
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/sse2-intrinsics-canonical.ll
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
define <8 x i8> @test_x86_sse2_paddus_b_64(<8 x i8> %a0, <8 x i8> %a1) {
; SSE-LABEL: test_x86_sse2_paddus_b_64:
; SSE: ## %bb.0:
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE-NEXT: ## encoding: [0x66,0x0f,0x6f,0x15,A,A,A,A]
; SSE-NEXT: ## fixup A - offset: 4, value: LCPI4_0, kind: FK_Data_4
; SSE-NEXT: pand %xmm2, %xmm1 ## encoding: [0x66,0x0f,0xdb,0xca]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) {
define i1 @trunc_v16i16_v16i1(<16 x i16>) {
; SSE2-LABEL: trunc_v16i16_v16i1:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: packuswb %xmm1, %xmm0
Expand Down Expand Up @@ -690,7 +690,7 @@ define i1 @trunc_v16i32_v16i1(<16 x i32>) {
define i1 @trunc_v32i16_v32i1(<32 x i16>) {
; SSE2-LABEL: trunc_v32i16_v32i1:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE2-NEXT: pand %xmm4, %xmm3
; SSE2-NEXT: pand %xmm4, %xmm2
; SSE2-NEXT: packuswb %xmm3, %xmm2
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) {
define i1 @trunc_v16i16_v16i1(<16 x i16>) {
; SSE2-LABEL: trunc_v16i16_v16i1:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: packuswb %xmm1, %xmm0
Expand Down Expand Up @@ -681,7 +681,7 @@ define i1 @trunc_v16i32_v16i1(<16 x i32>) {
define i1 @trunc_v32i16_v32i1(<32 x i16>) {
; SSE2-LABEL: trunc_v32i16_v32i1:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE2-NEXT: pand %xmm4, %xmm3
; SSE2-NEXT: pand %xmm4, %xmm2
; SSE2-NEXT: packuswb %xmm3, %xmm2
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) {
define i1 @trunc_v16i16_v16i1(<16 x i16>) {
; SSE2-LABEL: trunc_v16i16_v16i1:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: packuswb %xmm1, %xmm0
Expand Down Expand Up @@ -767,7 +767,7 @@ define i1 @trunc_v16i32_v16i1(<16 x i32>) {
define i1 @trunc_v32i16_v32i1(<32 x i16>) {
; SSE2-LABEL: trunc_v32i16_v32i1:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE2-NEXT: pand %xmm4, %xmm3
; SSE2-NEXT: pand %xmm4, %xmm2
; SSE2-NEXT: packuswb %xmm3, %xmm2
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1688,7 +1688,7 @@ define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06(
define <16 x i8> @PR12412(<16 x i8> %inval1, <16 x i8> %inval2) {
; SSE2-LABEL: PR12412:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: packuswb %xmm1, %xmm0
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/X86/vector-trunc-math-widen.ll
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ define <16 x i8> @trunc_add_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin
; SSE: # %bb.0:
; SSE-NEXT: paddw %xmm2, %xmm0
; SSE-NEXT: paddw %xmm3, %xmm1
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: packuswb %xmm1, %xmm0
Expand Down Expand Up @@ -812,7 +812,7 @@ define <16 x i8> @trunc_add_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
define <16 x i8> @trunc_add_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
; SSE-LABEL: trunc_add_const_v16i16_v16i8:
; SSE: # %bb.0:
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: packuswb %xmm1, %xmm0
Expand Down Expand Up @@ -1247,7 +1247,7 @@ define <16 x i8> @trunc_sub_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin
; SSE: # %bb.0:
; SSE-NEXT: psubw %xmm2, %xmm0
; SSE-NEXT: psubw %xmm3, %xmm1
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: packuswb %xmm1, %xmm0
Expand Down Expand Up @@ -1638,7 +1638,7 @@ define <16 x i8> @trunc_sub_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
define <16 x i8> @trunc_sub_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
; SSE-LABEL: trunc_sub_const_v16i16_v16i8:
; SSE: # %bb.0:
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: packuswb %xmm1, %xmm0
Expand Down Expand Up @@ -2240,7 +2240,7 @@ define <16 x i8> @trunc_mul_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin
; SSE: # %bb.0:
; SSE-NEXT: pmullw %xmm2, %xmm0
; SSE-NEXT: pmullw %xmm3, %xmm1
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: packuswb %xmm1, %xmm0
Expand Down Expand Up @@ -2728,7 +2728,7 @@ define <16 x i8> @trunc_mul_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
; SSE: # %bb.0:
; SSE-NEXT: pmullw {{.*}}(%rip), %xmm0
; SSE-NEXT: pmullw {{.*}}(%rip), %xmm1
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: packuswb %xmm1, %xmm0
Expand Down Expand Up @@ -3135,7 +3135,7 @@ define <16 x i8> @trunc_and_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwin
define <16 x i8> @trunc_and_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwind {
; SSE-LABEL: trunc_and_v16i16_v16i8:
; SSE: # %bb.0:
; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE-NEXT: pand %xmm4, %xmm3
; SSE-NEXT: pand %xmm1, %xmm3
; SSE-NEXT: pand %xmm4, %xmm2
Expand Down Expand Up @@ -3507,7 +3507,7 @@ define <16 x i8> @trunc_and_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
define <16 x i8> @trunc_and_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
; SSE-LABEL: trunc_and_const_v16i16_v16i8:
; SSE: # %bb.0:
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: packuswb %xmm1, %xmm0
Expand Down Expand Up @@ -3914,7 +3914,7 @@ define <16 x i8> @trunc_xor_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin
; SSE: # %bb.0:
; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: pxor %xmm3, %xmm1
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: packuswb %xmm1, %xmm0
Expand Down Expand Up @@ -4284,7 +4284,7 @@ define <16 x i8> @trunc_xor_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
define <16 x i8> @trunc_xor_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
; SSE-LABEL: trunc_xor_const_v16i16_v16i8:
; SSE: # %bb.0:
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: packuswb %xmm1, %xmm0
Expand Down Expand Up @@ -4691,7 +4691,7 @@ define <16 x i8> @trunc_or_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwind
; SSE: # %bb.0:
; SSE-NEXT: por %xmm2, %xmm0
; SSE-NEXT: por %xmm3, %xmm1
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: packuswb %xmm1, %xmm0
Expand Down Expand Up @@ -5061,7 +5061,7 @@ define <16 x i8> @trunc_or_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
define <16 x i8> @trunc_or_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
; SSE-LABEL: trunc_or_const_v16i16_v16i8:
; SSE: # %bb.0:
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: packuswb %xmm1, %xmm0
Expand Down
Loading

0 comments on commit 510e6fa

Please sign in to comment.